1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #if 0 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCAdaptiveSelection" 11 PetscErrorCode PCBDDCAdaptiveSelection(PC pc, MatNullSpace* adapt_nnsp) 12 { 13 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs[0],pcbddc->mat_graph,...);CHKERRQ(ierr); 18 PetscFunctionReturn(0); 19 } 20 #endif 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "PCBDDCSetUpSolvers" 24 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 25 { 26 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 27 PetscScalar *coarse_submat_vals; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 /* Setup local scatters R_to_B and (optionally) R_to_D */ 32 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 33 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 34 35 /* Setup local neumann solver ksp_R */ 36 /* PCBDDCSetUpLocalScatters should be called first! */ 37 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 38 39 /* Change global null space passed in by the user if change of basis has been requested */ 40 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 41 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 42 } 43 44 /* 45 Setup local correction and local part of coarse basis. 46 Gives back the dense local part of the coarse matrix in column major ordering 47 */ 48 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 49 50 /* Compute total number of coarse nodes and setup coarse solver */ 51 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 52 53 /* free */ 54 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCBDDCResetCustomization" 60 PetscErrorCode PCBDDCResetCustomization(PC pc) 61 { 62 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 67 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 68 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 70 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 71 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 72 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 73 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 74 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 75 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 76 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetTopography" 82 PetscErrorCode PCBDDCResetTopography(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 89 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 91 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 92 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "PCBDDCResetSolvers" 98 PetscErrorCode PCBDDCResetSolvers(PC pc) 99 { 100 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 101 PetscErrorCode ierr; 102 103 PetscFunctionBegin; 104 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 105 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 106 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 110 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 112 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 113 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 114 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 115 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 116 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 117 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 118 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 119 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 120 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 121 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 122 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 123 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 124 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 125 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 126 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 127 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 #undef __FUNCT__ 132 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 133 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 134 { 135 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 136 PC_IS *pcis = (PC_IS*)pc->data; 137 VecType impVecType; 138 PetscInt n_constraints,n_R,old_size; 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 if (!pcbddc->ConstraintMatrix) { 143 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 144 } 145 /* get sizes */ 146 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 147 n_R = pcis->n-pcbddc->n_actual_vertices; 148 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 149 /* local work vectors (try to avoid unneeded work)*/ 150 /* R nodes */ 151 old_size = -1; 152 if (pcbddc->vec1_R) { 153 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 154 } 155 if (n_R != old_size) { 156 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 157 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 158 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 159 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 160 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 161 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 162 } 163 /* local primal dofs */ 164 old_size = -1; 165 if (pcbddc->vec1_P) { 166 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 167 } 168 if (pcbddc->local_primal_size != old_size) { 169 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 170 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 171 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 172 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 173 } 174 /* local explicit constraints */ 175 old_size = -1; 176 if (pcbddc->vec1_C) { 177 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 178 } 179 if (n_constraints && n_constraints != old_size) { 180 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 181 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 182 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 183 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "PCBDDCSetUpCorrection" 190 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 191 { 192 PetscErrorCode ierr; 193 /* pointers to pcis and pcbddc */ 194 PC_IS* pcis = (PC_IS*)pc->data; 195 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 196 /* submatrices of local problem */ 197 Mat A_RV,A_VR,A_VV; 198 /* working matrices */ 199 Mat M1,M2,M3,C_CR; 200 /* working vectors */ 201 Vec vec1_C,vec2_C,vec1_V,vec2_V; 202 /* additional working stuff */ 203 IS is_aux; 204 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 205 const PetscScalar *array,*row_cmat_values; 206 const PetscInt *row_cmat_indices,*idx_R_local; 207 PetscInt *idx_V_B,*auxindices; 208 PetscInt n_vertices,n_constraints,size_of_constraint; 209 PetscInt i,j,n_R,n_D,n_B; 210 PetscBool unsymmetric_check; 211 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 212 MatType impMatType; 213 /* some shortcuts to scalars */ 214 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 215 /* for debugging purposes */ 216 PetscReal *coarsefunctions_errors,*constraints_errors; 217 218 PetscFunctionBegin; 219 /* get number of vertices (corners plus constraints with change of basis) 220 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 221 n_vertices = pcbddc->n_actual_vertices; 222 n_constraints = pcbddc->local_primal_size-n_vertices; 223 /* Set Non-overlapping dimensions */ 224 n_B = pcis->n_B; n_D = pcis->n - n_B; 225 n_R = pcis->n-n_vertices; 226 227 /* Set types for local objects needed by BDDC precondtioner */ 228 impMatType = MATSEQDENSE; 229 230 /* Allocating some extra storage just to be safe */ 231 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 232 for (i=0;i<pcis->n;i++) auxindices[i]=i; 233 234 /* vertices in boundary numbering */ 235 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 236 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 237 if (i != n_vertices) { 238 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 239 } 240 241 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 242 if (n_constraints) { 243 /* see if we can save some allocations */ 244 if (pcbddc->local_auxmat2) { 245 PetscInt on_R,on_constraints; 246 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 247 if (on_R != n_R || on_constraints != n_constraints) { 248 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 249 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 250 } 251 } 252 /* work vectors */ 253 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 254 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 255 /* auxiliary matrices */ 256 if (!pcbddc->local_auxmat2) { 257 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 258 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 259 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 260 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 261 } 262 263 /* Extract constraints on R nodes: C_{CR} */ 264 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 265 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 266 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 267 268 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 269 for (i=0;i<n_constraints;i++) { 270 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 271 /* Get row of constraint matrix in R numbering */ 272 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 273 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 274 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 275 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 276 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 277 /* Solve for row of constraint matrix in R numbering */ 278 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 279 /* Set values in local_auxmat2 */ 280 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 281 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 282 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 283 } 284 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 285 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 286 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 287 288 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 289 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 290 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 291 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 292 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 293 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 294 ierr = MatSetUp(M1);CHKERRQ(ierr); 295 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 296 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 297 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 298 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 299 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 300 ierr = MatDestroy(&M2);CHKERRQ(ierr); 301 ierr = MatDestroy(&M3);CHKERRQ(ierr); 302 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 303 if (!pcbddc->local_auxmat1) { 304 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 305 } else { 306 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 307 } 308 } 309 310 /* Get submatrices from subdomain matrix */ 311 if (n_vertices) { 312 PetscInt ibs,mbs; 313 PetscBool issbaij; 314 Mat newmat; 315 316 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 317 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 318 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 319 if (ibs != mbs) { /* need to convert to SEQAIJ */ 320 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 321 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 324 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 325 } else { 326 /* this is safe */ 327 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 328 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 329 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 330 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 331 /* which of the two approaches is faster? */ 332 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 333 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 334 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 335 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 336 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 337 } else { 338 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 339 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 340 } 341 } 342 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 343 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 344 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 345 } 346 347 /* Matrix of coarse basis functions (local) */ 348 if (pcbddc->coarse_phi_B) { 349 PetscInt on_B,on_primal; 350 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 351 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 352 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 354 } 355 } 356 if (pcbddc->coarse_phi_D) { 357 PetscInt on_D,on_primal; 358 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 359 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 360 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 361 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 362 } 363 } 364 if (!pcbddc->coarse_phi_B) { 365 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 366 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 367 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 368 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 369 } 370 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 371 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 372 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 373 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 374 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 375 } 376 377 if (pcbddc->dbg_flag) { 378 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 379 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 380 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 381 } 382 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 383 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 384 385 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 386 387 /* vertices */ 388 for (i=0;i<n_vertices;i++) { 389 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 390 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 391 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 392 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 393 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 394 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 395 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 396 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 397 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 398 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 399 if (n_constraints) { 400 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 401 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 402 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 403 } 404 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 405 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 406 407 /* Set values in coarse basis function and subdomain part of coarse_mat */ 408 /* coarse basis functions */ 409 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 410 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 411 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 412 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 413 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 415 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 416 if (pcbddc->switch_static || pcbddc->dbg_flag) { 417 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 418 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 419 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 422 } 423 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 424 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 425 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 426 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 427 if (n_constraints) { 428 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 429 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 430 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 431 } 432 433 /* check */ 434 if (pcbddc->dbg_flag) { 435 /* assemble subdomain vector on local nodes */ 436 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 437 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 438 if (n_R) { 439 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 440 } 441 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 442 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 443 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 444 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 445 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 446 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 447 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 448 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 449 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 450 if (n_constraints) { 451 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 452 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 453 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 454 } 455 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 456 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 457 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 458 /* check saddle point solution */ 459 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 460 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 461 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 463 /* shift by the identity matrix */ 464 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 465 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 466 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 468 } 469 } 470 471 /* constraints */ 472 for (i=0;i<n_constraints;i++) { 473 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 474 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 476 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 477 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 478 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 479 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 480 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 481 if (n_vertices) { 482 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 483 } 484 /* Set values in coarse basis function and subdomain part of coarse_mat */ 485 /* coarse basis functions */ 486 j = i+n_vertices; /* don't touch this! */ 487 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 488 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 489 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 491 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 492 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 493 if (pcbddc->switch_static || pcbddc->dbg_flag) { 494 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 495 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 497 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 499 } 500 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 501 if (n_vertices) { 502 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 503 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 504 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 } 506 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 507 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 508 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 510 if (pcbddc->dbg_flag) { 511 /* assemble subdomain vector on nodes */ 512 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 513 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 514 if (n_R) { 515 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 516 } 517 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 518 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 519 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 520 /* assemble subdomain vector of lagrange multipliers */ 521 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 522 if (n_vertices) { 523 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 524 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 525 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 526 } 527 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 528 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 529 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 530 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 531 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 532 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 533 /* check saddle point solution */ 534 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 535 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 536 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 537 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 538 /* shift by the identity matrix */ 539 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 540 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 541 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 542 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 543 } 544 } 545 /* call assembling routines for local coarse basis */ 546 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 if (pcbddc->switch_static || pcbddc->dbg_flag) { 549 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } 552 553 /* compute other basis functions for non-symmetric problems */ 554 if (!pcbddc->issym) { 555 if (!pcbddc->coarse_psi_B) { 556 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 557 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 558 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 559 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 560 } 561 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 562 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 563 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 564 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 565 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 566 } 567 for (i=0;i<pcbddc->local_primal_size;i++) { 568 if (n_constraints) { 569 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 570 for (j=0;j<n_constraints;j++) { 571 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 572 } 573 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 574 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 575 } 576 if (i<n_vertices) { 577 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 578 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 579 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 580 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 581 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 582 if (n_constraints) { 583 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 584 } 585 } else { 586 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 587 } 588 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 589 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 590 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 591 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 592 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 593 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 594 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 595 if (i<n_vertices) { 596 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 597 } 598 if (pcbddc->switch_static || pcbddc->dbg_flag) { 599 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 600 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 601 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 602 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 603 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 604 } 605 606 if (pcbddc->dbg_flag) { 607 /* assemble subdomain vector on nodes */ 608 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 609 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 610 if (n_R) { 611 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 612 } 613 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 614 if (i<n_vertices) { 615 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 616 } 617 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 618 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 619 /* assemble subdomain vector of lagrange multipliers */ 620 for (j=0;j<pcbddc->local_primal_size;j++) { 621 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 622 } 623 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 624 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 625 /* check saddle point solution */ 626 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 627 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 628 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 629 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 630 /* shift by the identity matrix */ 631 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 632 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 633 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 634 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 635 } 636 } 637 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 638 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 639 if (pcbddc->switch_static || pcbddc->dbg_flag) { 640 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 641 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 642 } 643 unsymmetric_check = PETSC_TRUE; 644 } else { /* take references to already computed coarse basis */ 645 unsymmetric_check = PETSC_FALSE; 646 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 647 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 648 if (pcbddc->coarse_phi_D) { 649 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 650 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 651 } 652 } 653 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 654 /* Checking coarse_sub_mat and coarse basis functios */ 655 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 656 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 657 if (pcbddc->dbg_flag) { 658 Mat coarse_sub_mat; 659 Mat AUXMAT,TM1,TM2,TM3,TM4; 660 Mat coarse_phi_D,coarse_phi_B; 661 Mat coarse_psi_D,coarse_psi_B; 662 Mat A_II,A_BB,A_IB,A_BI; 663 MatType checkmattype=MATSEQAIJ; 664 PetscReal real_value; 665 666 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 667 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 668 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 669 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 670 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 671 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 672 if (unsymmetric_check) { 673 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 674 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 675 } 676 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 677 678 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 679 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 680 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 681 if (unsymmetric_check) { 682 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 683 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 684 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 685 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 686 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 687 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 688 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 689 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 690 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 691 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 692 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 693 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 694 } else { 695 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 696 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 697 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 698 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 699 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 700 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 701 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 702 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 703 } 704 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 705 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 706 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 707 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 708 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 709 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 710 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 711 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 715 for (i=0;i<pcbddc->local_primal_size;i++) { 716 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 717 } 718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 719 for (i=0;i<pcbddc->local_primal_size;i++) { 720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 721 } 722 if (unsymmetric_check) { 723 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 724 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 725 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 726 } 727 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 728 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 729 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 730 } 731 } 732 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 733 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 734 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 737 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 738 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 739 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 740 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 741 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 742 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 743 if (unsymmetric_check) { 744 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 745 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 746 } 747 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 748 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 749 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 750 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 751 } 752 /* free memory */ 753 if (n_vertices) { 754 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 755 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 756 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 757 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 758 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 759 } 760 if (n_constraints) { 761 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 762 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 763 ierr = MatDestroy(&M1);CHKERRQ(ierr); 764 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 765 } 766 ierr = PetscFree(auxindices);CHKERRQ(ierr); 767 /* get back data */ 768 *coarse_submat_vals_n = coarse_submat_vals; 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatGetSubMatrixUnsorted" 774 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B) 775 { 776 Mat *work_mat; 777 IS isrow_s,iscol_s; 778 PetscBool rsorted,csorted; 779 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 780 PetscErrorCode ierr; 781 782 PetscFunctionBegin; 783 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 784 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 785 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 786 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 787 788 if (!rsorted) { 789 const PetscInt *idxs; 790 PetscInt *idxs_sorted,i; 791 792 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 793 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 794 for (i=0;i<rsize;i++) { 795 idxs_perm_r[i] = i; 796 } 797 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 798 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 799 for (i=0;i<rsize;i++) { 800 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 801 } 802 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 803 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 804 } else { 805 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 806 isrow_s = isrow; 807 } 808 809 if (!csorted) { 810 if (isrow == iscol) { 811 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 812 iscol_s = isrow_s; 813 } else { 814 const PetscInt *idxs; 815 PetscInt *idxs_sorted,i; 816 817 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 818 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 819 for (i=0;i<csize;i++) { 820 idxs_perm_c[i] = i; 821 } 822 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 823 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 824 for (i=0;i<csize;i++) { 825 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 826 } 827 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 828 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 829 } 830 } else { 831 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 832 iscol_s = iscol; 833 } 834 835 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr); 836 837 if (!rsorted || !csorted) { 838 Mat new_mat; 839 IS is_perm_r,is_perm_c; 840 841 if (!rsorted) { 842 PetscInt *idxs_r,i; 843 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 844 for (i=0;i<rsize;i++) { 845 idxs_r[idxs_perm_r[i]] = i; 846 } 847 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 848 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 849 } else { 850 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 851 } 852 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 853 854 if (!csorted) { 855 if (isrow_s == iscol_s) { 856 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 857 is_perm_c = is_perm_r; 858 } else { 859 PetscInt *idxs_c,i; 860 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 861 for (i=0;i<csize;i++) { 862 idxs_c[idxs_perm_c[i]] = i; 863 } 864 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 865 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 866 } 867 } else { 868 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 869 } 870 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 871 872 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 873 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 874 work_mat[0] = new_mat; 875 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 876 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 877 } 878 879 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 880 *B = work_mat[0]; 881 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 882 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 883 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 884 PetscFunctionReturn(0); 885 } 886 887 #undef __FUNCT__ 888 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 889 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 890 { 891 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 892 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 893 Mat new_mat; 894 IS is_local,is_global; 895 PetscInt local_size; 896 PetscBool isseqaij; 897 PetscErrorCode ierr; 898 899 PetscFunctionBegin; 900 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 901 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 902 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 903 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 904 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 905 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr); 906 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 907 908 /* check */ 909 if (pcbddc->dbg_flag) { 910 Vec x,x_change; 911 PetscReal error; 912 913 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 914 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 915 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 916 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 919 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 920 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 921 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 922 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 923 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 924 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 925 ierr = VecDestroy(&x);CHKERRQ(ierr); 926 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 927 } 928 929 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 930 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 931 if (isseqaij) { 932 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 933 } else { 934 Mat work_mat; 935 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 936 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 937 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 938 } 939 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 940 /* 941 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 942 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 943 */ 944 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 950 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 951 { 952 PC_IS* pcis = (PC_IS*)(pc->data); 953 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 954 IS is_aux1,is_aux2; 955 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 956 PetscInt n_vertices,i,j,n_R,n_D,n_B; 957 PetscInt vbs,bs; 958 PetscBT bitmask; 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 /* 963 No need to setup local scatters if 964 - primal space is unchanged 965 AND 966 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 967 AND 968 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 969 */ 970 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 971 PetscFunctionReturn(0); 972 } 973 /* destroy old objects */ 974 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 975 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 976 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 977 /* Set Non-overlapping dimensions */ 978 n_B = pcis->n_B; n_D = pcis->n - n_B; 979 n_vertices = pcbddc->n_actual_vertices; 980 /* create auxiliary bitmask */ 981 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 982 for (i=0;i<n_vertices;i++) { 983 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 984 } 985 986 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 987 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 988 for (i=0, n_R=0; i<pcis->n; i++) { 989 if (!PetscBTLookup(bitmask,i)) { 990 idx_R_local[n_R] = i; 991 n_R++; 992 } 993 } 994 995 /* Block code */ 996 vbs = 1; 997 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 998 if (bs>1 && !(n_vertices%bs)) { 999 PetscBool is_blocked = PETSC_TRUE; 1000 PetscInt *vary; 1001 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1002 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1003 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1004 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1005 for (i=0; i<n_vertices; i++) { 1006 if (vary[i]!=0 && vary[i]!=bs) { 1007 is_blocked = PETSC_FALSE; 1008 break; 1009 } 1010 } 1011 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1012 vbs = bs; 1013 for (i=0;i<n_R/vbs;i++) { 1014 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1015 } 1016 } 1017 ierr = PetscFree(vary);CHKERRQ(ierr); 1018 } 1019 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1020 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1021 1022 /* print some info if requested */ 1023 if (pcbddc->dbg_flag) { 1024 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1025 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1026 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1027 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1030 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1032 } 1033 1034 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1035 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1036 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1038 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1039 for (i=0; i<n_D; i++) { 1040 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1041 } 1042 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1043 for (i=0, j=0; i<n_R; i++) { 1044 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1045 aux_array1[j++] = i; 1046 } 1047 } 1048 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1049 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1050 for (i=0, j=0; i<n_B; i++) { 1051 if (!PetscBTLookup(bitmask,is_indices[i])) { 1052 aux_array2[j++] = i; 1053 } 1054 } 1055 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1056 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1057 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1058 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1059 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1060 1061 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1062 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1063 for (i=0, j=0; i<n_R; i++) { 1064 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1065 aux_array1[j++] = i; 1066 } 1067 } 1068 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1069 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1070 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1071 } 1072 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1073 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1080 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1081 { 1082 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1083 PC_IS *pcis = (PC_IS*)pc->data; 1084 PC pc_temp; 1085 Mat A_RR; 1086 MatReuse reuse; 1087 PetscScalar m_one = -1.0; 1088 PetscReal value; 1089 PetscInt n_D,n_R,ibs,mbs; 1090 PetscBool use_exact,use_exact_reduced,issbaij; 1091 PetscErrorCode ierr; 1092 /* prefixes stuff */ 1093 char dir_prefix[256],neu_prefix[256],str_level[16]; 1094 size_t len; 1095 1096 PetscFunctionBegin; 1097 1098 /* compute prefixes */ 1099 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1100 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1101 if (!pcbddc->current_level) { 1102 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1103 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1104 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1105 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1106 } else { 1107 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1108 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1109 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1110 len -= 15; /* remove "pc_bddc_coarse_" */ 1111 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1112 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1113 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1114 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1115 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1116 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1117 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1118 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1119 } 1120 1121 /* DIRICHLET PROBLEM */ 1122 if (dirichlet) { 1123 /* Matrix for Dirichlet problem is pcis->A_II */ 1124 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1125 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1126 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1127 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1128 /* default */ 1129 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1130 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1131 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1132 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1133 if (issbaij) { 1134 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1135 } else { 1136 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1137 } 1138 /* Allow user's customization */ 1139 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1140 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1141 } 1142 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1143 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1144 if (!n_D) { 1145 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1146 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1147 } 1148 /* Set Up KSP for Dirichlet problem of BDDC */ 1149 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1150 /* set ksp_D into pcis data */ 1151 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1152 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1153 pcis->ksp_D = pcbddc->ksp_D; 1154 } 1155 1156 /* NEUMANN PROBLEM */ 1157 A_RR = 0; 1158 if (neumann) { 1159 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1160 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1161 if (pcbddc->ksp_R) { /* already created ksp */ 1162 PetscInt nn_R; 1163 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1164 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1165 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1166 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1167 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1168 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1169 reuse = MAT_INITIAL_MATRIX; 1170 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1171 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1172 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1173 reuse = MAT_INITIAL_MATRIX; 1174 } else { /* safe to reuse the matrix */ 1175 reuse = MAT_REUSE_MATRIX; 1176 } 1177 } 1178 /* last check */ 1179 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1180 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1181 reuse = MAT_INITIAL_MATRIX; 1182 } 1183 } else { /* first time, so we need to create the matrix */ 1184 reuse = MAT_INITIAL_MATRIX; 1185 } 1186 /* extract A_RR */ 1187 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1188 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1189 if (ibs != mbs) { 1190 Mat newmat; 1191 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1192 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1193 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1194 } else { 1195 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1196 } 1197 if (!pcbddc->ksp_R) { /* create object if not present */ 1198 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1199 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1200 /* default */ 1201 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1202 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1203 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1204 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1205 if (issbaij) { 1206 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1207 } else { 1208 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1209 } 1210 /* Allow user's customization */ 1211 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1212 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1213 } 1214 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1215 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1216 if (!n_R) { 1217 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1218 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1219 } 1220 /* Set Up KSP for Neumann problem of BDDC */ 1221 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1222 } 1223 1224 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1225 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1226 if (pcbddc->dbg_flag) { 1227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1228 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1229 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1230 } 1231 if (dirichlet) { /* Dirichlet */ 1232 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1233 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1234 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1235 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1236 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1237 /* need to be adapted? */ 1238 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1239 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1240 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1241 /* print info */ 1242 if (pcbddc->dbg_flag) { 1243 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1244 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1245 } 1246 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1247 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1248 } 1249 } 1250 if (neumann) { /* Neumann */ 1251 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1252 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1253 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1254 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1255 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1256 /* need to be adapted? */ 1257 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1258 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1259 /* print info */ 1260 if (pcbddc->dbg_flag) { 1261 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1262 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1263 } 1264 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1265 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1266 } 1267 } 1268 } 1269 /* free Neumann problem's matrix */ 1270 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #undef __FUNCT__ 1275 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1276 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1277 { 1278 PetscErrorCode ierr; 1279 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1280 1281 PetscFunctionBegin; 1282 if (applytranspose) { 1283 if (pcbddc->local_auxmat1) { 1284 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1285 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1286 } 1287 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1288 } else { 1289 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1290 if (pcbddc->local_auxmat1) { 1291 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1292 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1293 } 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1301 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1302 { 1303 PetscErrorCode ierr; 1304 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1305 PC_IS* pcis = (PC_IS*) (pc->data); 1306 const PetscScalar zero = 0.0; 1307 1308 PetscFunctionBegin; 1309 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1310 if (applytranspose) { 1311 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1312 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1313 } else { 1314 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1315 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1316 } 1317 /* start communications from local primal nodes to rhs of coarse solver */ 1318 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1319 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1320 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 1322 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1323 /* TODO remove null space when doing multilevel */ 1324 if (pcbddc->coarse_ksp) { 1325 if (applytranspose) { 1326 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1327 } else { 1328 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1329 } 1330 } 1331 1332 /* Local solution on R nodes */ 1333 if (pcis->n) { 1334 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1335 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1336 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1337 if (pcbddc->switch_static) { 1338 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1339 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1340 } 1341 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1342 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1343 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1344 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1345 if (pcbddc->switch_static) { 1346 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1347 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1348 } 1349 } 1350 1351 /* communications from coarse sol to local primal nodes */ 1352 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1353 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354 1355 /* Sum contributions from two levels */ 1356 if (applytranspose) { 1357 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1358 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1359 } else { 1360 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1361 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1369 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1370 { 1371 PetscErrorCode ierr; 1372 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1373 PetscScalar *array,*array2; 1374 Vec from,to; 1375 1376 PetscFunctionBegin; 1377 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1378 from = pcbddc->coarse_vec; 1379 to = pcbddc->vec1_P; 1380 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1381 Vec tvec; 1382 PetscInt lsize; 1383 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1384 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1385 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1386 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1387 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1388 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1389 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1390 } 1391 } else { /* from local to global -> put data in coarse right hand side */ 1392 from = pcbddc->vec1_P; 1393 to = pcbddc->coarse_vec; 1394 } 1395 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1396 PetscFunctionReturn(0); 1397 } 1398 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1401 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1402 { 1403 PetscErrorCode ierr; 1404 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1405 PetscScalar *array,*array2; 1406 Vec from,to; 1407 1408 PetscFunctionBegin; 1409 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1410 from = pcbddc->coarse_vec; 1411 to = pcbddc->vec1_P; 1412 } else { /* from local to global -> put data in coarse right hand side */ 1413 from = pcbddc->vec1_P; 1414 to = pcbddc->coarse_vec; 1415 } 1416 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1417 if (smode == SCATTER_FORWARD) { 1418 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1419 Vec tvec; 1420 PetscInt lsize; 1421 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1422 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1423 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1424 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1425 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1426 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1427 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1428 } 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 /* uncomment for testing purposes */ 1434 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1437 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1438 { 1439 PetscErrorCode ierr; 1440 PC_IS* pcis = (PC_IS*)(pc->data); 1441 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1442 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1443 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1444 MatType impMatType=MATSEQAIJ; 1445 /* one and zero */ 1446 PetscScalar one=1.0,zero=0.0; 1447 /* space to store constraints and their local indices */ 1448 PetscScalar *temp_quadrature_constraint; 1449 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1450 /* iterators */ 1451 PetscInt i,j,k,total_counts,temp_start_ptr; 1452 /* stuff to store connected components stored in pcbddc->mat_graph */ 1453 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1454 PetscInt n_ISForFaces,n_ISForEdges; 1455 /* near null space stuff */ 1456 MatNullSpace nearnullsp; 1457 const Vec *nearnullvecs; 1458 Vec *localnearnullsp; 1459 PetscBool nnsp_has_cnst; 1460 PetscInt nnsp_size; 1461 PetscScalar *array; 1462 /* BLAS integers */ 1463 PetscBLASInt lwork,lierr; 1464 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1465 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1466 /* LAPACK working arrays for SVD or POD */ 1467 PetscBool skip_lapack; 1468 PetscScalar *work; 1469 PetscReal *singular_vals; 1470 #if defined(PETSC_USE_COMPLEX) 1471 PetscReal *rwork; 1472 #endif 1473 #if defined(PETSC_MISSING_LAPACK_GESVD) 1474 PetscBLASInt Blas_one_2=1; 1475 PetscScalar *temp_basis,*correlation_mat; 1476 #else 1477 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1478 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1479 #endif 1480 /* reuse */ 1481 PetscInt olocal_primal_size; 1482 PetscInt *oprimal_indices_local_idxs; 1483 /* change of basis */ 1484 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1485 PetscBool boolforchange,qr_needed; 1486 PetscBT touched,change_basis,qr_needed_idx; 1487 /* auxiliary stuff */ 1488 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1489 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1490 PetscScalar *temp_quadrature_constraint_work; 1491 /* some quantities */ 1492 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1493 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1494 1495 1496 PetscFunctionBegin; 1497 /* Destroy Mat objects computed previously */ 1498 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1499 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1500 /* Get index sets for faces, edges and vertices from graph */ 1501 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1502 /* free unneeded index sets */ 1503 if (!pcbddc->use_vertices) { 1504 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1505 } 1506 if (!pcbddc->use_edges) { 1507 for (i=0;i<n_ISForEdges;i++) { 1508 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1509 } 1510 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1511 n_ISForEdges = 0; 1512 } 1513 if (!pcbddc->use_faces) { 1514 for (i=0;i<n_ISForFaces;i++) { 1515 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1516 } 1517 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1518 n_ISForFaces = 0; 1519 } 1520 /* HACKS (the following two blocks of code) */ 1521 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1522 pcbddc->use_change_of_basis = PETSC_TRUE; 1523 if (!ISForEdges) { 1524 pcbddc->use_change_on_faces = PETSC_TRUE; 1525 } 1526 } 1527 if (pcbddc->NullSpace) { 1528 /* use_change_of_basis should be consistent among processors */ 1529 PetscBool tbool[2],gbool[2]; 1530 tbool [0] = pcbddc->use_change_of_basis; 1531 tbool [1] = pcbddc->use_change_on_faces; 1532 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1533 pcbddc->use_change_of_basis = gbool[0]; 1534 pcbddc->use_change_on_faces = gbool[1]; 1535 } 1536 /* print some info */ 1537 if (pcbddc->dbg_flag) { 1538 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1539 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1540 i = 0; 1541 if (ISForVertices) { 1542 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1543 } 1544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1548 } 1549 /* check if near null space is attached to global mat */ 1550 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1551 if (nearnullsp) { 1552 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1553 /* remove any stored info */ 1554 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1555 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1556 /* store information for BDDC solver reuse */ 1557 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1558 pcbddc->onearnullspace = nearnullsp; 1559 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1560 for (i=0;i<nnsp_size;i++) { 1561 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1562 } 1563 } else { /* if near null space is not provided BDDC uses constants by default */ 1564 nnsp_size = 0; 1565 nnsp_has_cnst = PETSC_TRUE; 1566 } 1567 /* get max number of constraints on a single cc */ 1568 max_constraints = nnsp_size; 1569 if (nnsp_has_cnst) max_constraints++; 1570 1571 /* 1572 Evaluate maximum storage size needed by the procedure 1573 - temp_indices will contain start index of each constraint stored as follows 1574 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1575 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1576 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1577 */ 1578 total_counts = n_ISForFaces+n_ISForEdges; 1579 total_counts *= max_constraints; 1580 n_vertices = 0; 1581 if (ISForVertices) { 1582 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1583 } 1584 total_counts += n_vertices; 1585 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1586 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1587 total_counts = 0; 1588 max_size_of_constraint = 0; 1589 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1590 if (i<n_ISForEdges) { 1591 used_IS = &ISForEdges[i]; 1592 } else { 1593 used_IS = &ISForFaces[i-n_ISForEdges]; 1594 } 1595 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1596 total_counts += j; 1597 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1598 } 1599 total_counts *= max_constraints; 1600 total_counts += n_vertices; 1601 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1602 /* get local part of global near null space vectors */ 1603 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1604 for (k=0;k<nnsp_size;k++) { 1605 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1606 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 } 1609 1610 /* whether or not to skip lapack calls */ 1611 skip_lapack = PETSC_TRUE; 1612 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1613 1614 /* allocate some auxiliary stuff */ 1615 if (!skip_lapack || pcbddc->use_qr_single) { 1616 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1617 } 1618 1619 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1620 if (!skip_lapack) { 1621 PetscScalar temp_work; 1622 1623 #if defined(PETSC_MISSING_LAPACK_GESVD) 1624 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1625 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1626 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1627 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1628 #if defined(PETSC_USE_COMPLEX) 1629 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1630 #endif 1631 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1632 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1633 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1634 lwork = -1; 1635 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1636 #if !defined(PETSC_USE_COMPLEX) 1637 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1638 #else 1639 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1640 #endif 1641 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1642 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1643 #else /* on missing GESVD */ 1644 /* SVD */ 1645 PetscInt max_n,min_n; 1646 max_n = max_size_of_constraint; 1647 min_n = max_constraints; 1648 if (max_size_of_constraint < max_constraints) { 1649 min_n = max_size_of_constraint; 1650 max_n = max_constraints; 1651 } 1652 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1653 #if defined(PETSC_USE_COMPLEX) 1654 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1655 #endif 1656 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1657 lwork = -1; 1658 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1659 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1660 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1661 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1662 #if !defined(PETSC_USE_COMPLEX) 1663 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1664 #else 1665 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1666 #endif 1667 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1668 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1669 #endif /* on missing GESVD */ 1670 /* Allocate optimal workspace */ 1671 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1672 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1673 } 1674 /* Now we can loop on constraining sets */ 1675 total_counts = 0; 1676 temp_indices[0] = 0; 1677 /* vertices */ 1678 if (ISForVertices) { 1679 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1680 if (nnsp_has_cnst) { /* consider all vertices */ 1681 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1682 for (i=0;i<n_vertices;i++) { 1683 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1684 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1685 total_counts++; 1686 } 1687 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1688 PetscBool used_vertex; 1689 for (i=0;i<n_vertices;i++) { 1690 used_vertex = PETSC_FALSE; 1691 k = 0; 1692 while (!used_vertex && k<nnsp_size) { 1693 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1694 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1695 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1696 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1697 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1698 total_counts++; 1699 used_vertex = PETSC_TRUE; 1700 } 1701 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1702 k++; 1703 } 1704 } 1705 } 1706 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1707 n_vertices = total_counts; 1708 } 1709 1710 /* edges and faces */ 1711 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1712 if (ncc<n_ISForEdges) { 1713 used_IS = &ISForEdges[ncc]; 1714 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1715 } else { 1716 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1717 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1718 } 1719 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1720 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1721 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1722 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1723 /* change of basis should not be performed on local periodic nodes */ 1724 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1725 if (nnsp_has_cnst) { 1726 PetscScalar quad_value; 1727 temp_constraints++; 1728 if (!pcbddc->use_nnsp_true) { 1729 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1730 } else { 1731 quad_value = 1.0; 1732 } 1733 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1734 for (j=0;j<size_of_constraint;j++) { 1735 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1736 } 1737 /* sort by global ordering if using lapack subroutines */ 1738 if (!skip_lapack || pcbddc->use_qr_single) { 1739 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1740 for (j=0;j<size_of_constraint;j++) { 1741 permutation[j]=j; 1742 } 1743 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1744 for (j=0;j<size_of_constraint;j++) { 1745 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1746 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1747 } 1748 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1749 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1750 } 1751 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1752 total_counts++; 1753 } 1754 for (k=0;k<nnsp_size;k++) { 1755 PetscReal real_value; 1756 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1757 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1758 for (j=0;j<size_of_constraint;j++) { 1759 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1760 } 1761 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1762 /* check if array is null on the connected component */ 1763 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1764 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1765 if (real_value > 0.0) { /* keep indices and values */ 1766 /* sort by global ordering if using lapack subroutines */ 1767 if (!skip_lapack || pcbddc->use_qr_single) { 1768 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1769 for (j=0;j<size_of_constraint;j++) { 1770 permutation[j]=j; 1771 } 1772 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1773 for (j=0;j<size_of_constraint;j++) { 1774 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1775 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1776 } 1777 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1778 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1779 } 1780 temp_constraints++; 1781 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1782 total_counts++; 1783 } 1784 } 1785 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1786 valid_constraints = temp_constraints; 1787 if (!pcbddc->use_nnsp_true && temp_constraints) { 1788 if (temp_constraints == 1) { /* just normalize the constraint */ 1789 PetscScalar norm; 1790 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1791 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1792 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1793 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1794 } else { /* perform SVD */ 1795 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1796 1797 #if defined(PETSC_MISSING_LAPACK_GESVD) 1798 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1799 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1800 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1801 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1802 from that computed using LAPACKgesvd 1803 -> This is due to a different computation of eigenvectors in LAPACKheev 1804 -> The quality of the POD-computed basis will be the same */ 1805 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1806 /* Store upper triangular part of correlation matrix */ 1807 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1808 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1809 for (j=0;j<temp_constraints;j++) { 1810 for (k=0;k<j+1;k++) { 1811 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1812 } 1813 } 1814 /* compute eigenvalues and eigenvectors of correlation matrix */ 1815 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1816 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1817 #if !defined(PETSC_USE_COMPLEX) 1818 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1819 #else 1820 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1821 #endif 1822 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1823 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1824 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1825 j = 0; 1826 while (j < temp_constraints && singular_vals[j] < tol) j++; 1827 total_counts = total_counts-j; 1828 valid_constraints = temp_constraints-j; 1829 /* scale and copy POD basis into used quadrature memory */ 1830 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1831 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1832 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1833 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1834 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1835 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1836 if (j<temp_constraints) { 1837 PetscInt ii; 1838 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1839 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1840 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1841 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1842 for (k=0;k<temp_constraints-j;k++) { 1843 for (ii=0;ii<size_of_constraint;ii++) { 1844 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1845 } 1846 } 1847 } 1848 #else /* on missing GESVD */ 1849 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1850 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1851 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1852 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1853 #if !defined(PETSC_USE_COMPLEX) 1854 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1855 #else 1856 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1857 #endif 1858 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1859 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1860 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1861 k = temp_constraints; 1862 if (k > size_of_constraint) k = size_of_constraint; 1863 j = 0; 1864 while (j < k && singular_vals[k-j-1] < tol) j++; 1865 valid_constraints = k-j; 1866 total_counts = total_counts-temp_constraints+valid_constraints; 1867 #endif /* on missing GESVD */ 1868 } 1869 } 1870 /* setting change_of_basis flag is safe now */ 1871 if (boolforchange) { 1872 for (j=0;j<valid_constraints;j++) { 1873 PetscBTSet(change_basis,total_counts-j-1); 1874 } 1875 } 1876 } 1877 /* free index sets of faces, edges and vertices */ 1878 for (i=0;i<n_ISForFaces;i++) { 1879 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1880 } 1881 if (n_ISForFaces) { 1882 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1883 } 1884 for (i=0;i<n_ISForEdges;i++) { 1885 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1886 } 1887 if (n_ISForEdges) { 1888 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1889 } 1890 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1891 /* map temp_indices_to_constraint in boundary numbering */ 1892 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1893 if (i != temp_indices[total_counts]) { 1894 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1895 } 1896 1897 /* free workspace */ 1898 if (!skip_lapack || pcbddc->use_qr_single) { 1899 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1900 } 1901 if (!skip_lapack) { 1902 ierr = PetscFree(work);CHKERRQ(ierr); 1903 #if defined(PETSC_USE_COMPLEX) 1904 ierr = PetscFree(rwork);CHKERRQ(ierr); 1905 #endif 1906 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1907 #if defined(PETSC_MISSING_LAPACK_GESVD) 1908 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1909 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1910 #endif 1911 } 1912 for (k=0;k<nnsp_size;k++) { 1913 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1914 } 1915 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1916 1917 /* set quantities in pcbddc data structure and store previous primal size */ 1918 /* n_vertices defines the number of subdomain corners in the primal space */ 1919 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1920 olocal_primal_size = pcbddc->local_primal_size; 1921 pcbddc->local_primal_size = total_counts; 1922 pcbddc->n_vertices = n_vertices; 1923 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1924 1925 /* Create constraint matrix */ 1926 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1927 /* so we need to set it up properly either with or without change of basis */ 1928 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1929 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1930 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1931 /* array to compute a local numbering of constraints : vertices first then constraints */ 1932 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1933 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1934 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1935 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1936 /* auxiliary stuff for basis change */ 1937 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1938 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1939 1940 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1941 total_primal_vertices=0; 1942 for (i=0;i<pcbddc->local_primal_size;i++) { 1943 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1944 if (size_of_constraint == 1) { 1945 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1946 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1947 aux_primal_minloc[total_primal_vertices]=0; 1948 total_primal_vertices++; 1949 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1950 PetscInt min_loc,min_index; 1951 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1952 /* find first untouched local node */ 1953 k = 0; 1954 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1955 min_index = global_indices[k]; 1956 min_loc = k; 1957 /* search the minimum among global nodes already untouched on the cc */ 1958 for (k=1;k<size_of_constraint;k++) { 1959 /* there can be more than one constraint on a single connected component */ 1960 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1961 min_index = global_indices[k]; 1962 min_loc = k; 1963 } 1964 } 1965 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1966 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1967 aux_primal_minloc[total_primal_vertices]=min_loc; 1968 total_primal_vertices++; 1969 } 1970 } 1971 /* determine if a QR strategy is needed for change of basis */ 1972 qr_needed = PETSC_FALSE; 1973 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1974 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1975 if (PetscBTLookup(change_basis,i)) { 1976 if (!pcbddc->use_qr_single) { 1977 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1978 j = 0; 1979 for (k=0;k<size_of_constraint;k++) { 1980 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1981 j++; 1982 } 1983 } 1984 /* found more than one primal dof on the cc */ 1985 if (j > 1) { 1986 PetscBTSet(qr_needed_idx,i); 1987 qr_needed = PETSC_TRUE; 1988 } 1989 } else { 1990 PetscBTSet(qr_needed_idx,i); 1991 qr_needed = PETSC_TRUE; 1992 } 1993 } 1994 } 1995 /* free workspace */ 1996 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1997 1998 /* permute indices in order to have a sorted set of vertices */ 1999 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2000 2001 /* nonzero structure of constraint matrix */ 2002 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2003 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2004 j=total_primal_vertices; 2005 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2006 if (!PetscBTLookup(change_basis,i)) { 2007 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2008 j++; 2009 } 2010 } 2011 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2012 ierr = PetscFree(nnz);CHKERRQ(ierr); 2013 /* set values in constraint matrix */ 2014 for (i=0;i<total_primal_vertices;i++) { 2015 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2016 } 2017 total_counts = total_primal_vertices; 2018 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2019 if (!PetscBTLookup(change_basis,i)) { 2020 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2021 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2022 total_counts++; 2023 } 2024 } 2025 /* assembling */ 2026 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2027 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2028 /* 2029 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2030 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2031 */ 2032 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2033 if (pcbddc->use_change_of_basis) { 2034 /* dual and primal dofs on a single cc */ 2035 PetscInt dual_dofs,primal_dofs; 2036 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2037 PetscInt primal_counter; 2038 /* working stuff for GEQRF */ 2039 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2040 PetscBLASInt lqr_work; 2041 /* working stuff for UNGQR */ 2042 PetscScalar *gqr_work,lgqr_work_t; 2043 PetscBLASInt lgqr_work; 2044 /* working stuff for TRTRS */ 2045 PetscScalar *trs_rhs; 2046 PetscBLASInt Blas_NRHS; 2047 /* pointers for values insertion into change of basis matrix */ 2048 PetscInt *start_rows,*start_cols; 2049 PetscScalar *start_vals; 2050 /* working stuff for values insertion */ 2051 PetscBT is_primal; 2052 /* matrix sizes */ 2053 PetscInt global_size,local_size; 2054 /* work array for nonzeros */ 2055 PetscScalar *nnz_array; 2056 /* temporary change of basis */ 2057 Mat localChangeOfBasisMatrix; 2058 /* auxiliary work for global change of basis */ 2059 Vec nnz_vec; 2060 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 2061 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 2062 PetscScalar *vals; 2063 PetscBool done; 2064 2065 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2066 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2067 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2068 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2069 2070 /* nonzeros for local mat */ 2071 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2072 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2073 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2074 if (PetscBTLookup(change_basis,i)) { 2075 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2076 if (PetscBTLookup(qr_needed_idx,i)) { 2077 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2078 } else { 2079 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2080 /* get local primal index on the cc */ 2081 j = 0; 2082 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2083 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2084 } 2085 } 2086 } 2087 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2088 /* Set initial identity in the matrix */ 2089 for (i=0;i<pcis->n_B;i++) { 2090 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2091 } 2092 2093 if (pcbddc->dbg_flag) { 2094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2096 } 2097 2098 2099 /* Now we loop on the constraints which need a change of basis */ 2100 /* 2101 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2102 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2103 2104 Basic blocks of change of basis matrix T computed by 2105 2106 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2107 2108 | 1 0 ... 0 s_1/S | 2109 | 0 1 ... 0 s_2/S | 2110 | ... | 2111 | 0 ... 1 s_{n-1}/S | 2112 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2113 2114 with S = \sum_{i=1}^n s_i^2 2115 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2116 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2117 2118 - QR decomposition of constraints otherwise 2119 */ 2120 if (qr_needed) { 2121 /* space to store Q */ 2122 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2123 /* first we issue queries for optimal work */ 2124 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2125 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2126 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2127 lqr_work = -1; 2128 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2129 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2130 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2131 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2132 lgqr_work = -1; 2133 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2134 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2137 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2138 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2139 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2140 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2141 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2142 /* array to store scaling factors for reflectors */ 2143 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2144 /* array to store rhs and solution of triangular solver */ 2145 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2146 /* allocating workspace for check */ 2147 if (pcbddc->dbg_flag) { 2148 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2149 } 2150 } 2151 /* array to store whether a node is primal or not */ 2152 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2153 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2154 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2155 if (i != total_primal_vertices) { 2156 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2157 } 2158 for (i=0;i<total_primal_vertices;i++) { 2159 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2160 } 2161 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2162 2163 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2164 /* -> using implicit ordering contained in temp_indices data */ 2165 total_counts = pcbddc->n_vertices; 2166 primal_counter = total_counts; 2167 while (total_counts<pcbddc->local_primal_size) { 2168 primal_dofs = 1; 2169 if (PetscBTLookup(change_basis,total_counts)) { 2170 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2171 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2172 primal_dofs++; 2173 } 2174 /* get constraint info */ 2175 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2176 dual_dofs = size_of_constraint-primal_dofs; 2177 2178 if (pcbddc->dbg_flag) { 2179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2180 } 2181 2182 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2183 2184 /* copy quadrature constraints for change of basis check */ 2185 if (pcbddc->dbg_flag) { 2186 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2187 } 2188 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2189 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2190 2191 /* compute QR decomposition of constraints */ 2192 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2193 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2194 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2195 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2196 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2197 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2198 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2199 2200 /* explictly compute R^-T */ 2201 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2202 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2203 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2204 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2205 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2206 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2207 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2208 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2209 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2210 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2211 2212 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2213 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2214 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2215 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2216 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2217 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2218 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2219 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2220 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2221 2222 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2223 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2224 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2225 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2226 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2227 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2228 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2229 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2230 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2231 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2232 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2233 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2234 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2235 2236 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2237 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2238 /* insert cols for primal dofs */ 2239 for (j=0;j<primal_dofs;j++) { 2240 start_vals = &qr_basis[j*size_of_constraint]; 2241 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2242 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2243 } 2244 /* insert cols for dual dofs */ 2245 for (j=0,k=0;j<dual_dofs;k++) { 2246 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2247 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2248 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2249 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2250 j++; 2251 } 2252 } 2253 2254 /* check change of basis */ 2255 if (pcbddc->dbg_flag) { 2256 PetscInt ii,jj; 2257 PetscBool valid_qr=PETSC_TRUE; 2258 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2259 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2260 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2261 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2262 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2263 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2264 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2265 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2266 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2267 for (jj=0;jj<size_of_constraint;jj++) { 2268 for (ii=0;ii<primal_dofs;ii++) { 2269 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2270 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2271 } 2272 } 2273 if (!valid_qr) { 2274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2275 for (jj=0;jj<size_of_constraint;jj++) { 2276 for (ii=0;ii<primal_dofs;ii++) { 2277 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2278 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2279 } 2280 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2281 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2282 } 2283 } 2284 } 2285 } else { 2286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2287 } 2288 } 2289 } else { /* simple transformation block */ 2290 PetscInt row,col; 2291 PetscScalar val,norm; 2292 2293 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2294 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2295 for (j=0;j<size_of_constraint;j++) { 2296 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2297 if (!PetscBTLookup(is_primal,row)) { 2298 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2299 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2300 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2301 } else { 2302 for (k=0;k<size_of_constraint;k++) { 2303 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2304 if (row != col) { 2305 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2306 } else { 2307 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2308 } 2309 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2310 } 2311 } 2312 } 2313 if (pcbddc->dbg_flag) { 2314 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2315 } 2316 } 2317 /* increment primal counter */ 2318 primal_counter += primal_dofs; 2319 } else { 2320 if (pcbddc->dbg_flag) { 2321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2322 } 2323 } 2324 /* increment constraint counter total_counts */ 2325 total_counts += primal_dofs; 2326 } 2327 2328 /* free workspace */ 2329 if (qr_needed) { 2330 if (pcbddc->dbg_flag) { 2331 ierr = PetscFree(work);CHKERRQ(ierr); 2332 } 2333 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2334 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2335 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2336 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2337 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2338 } 2339 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2340 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 2343 /* assembling of global change of variable */ 2344 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2345 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2346 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2347 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2348 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2349 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2350 2351 /* nonzeros (overestimated) */ 2352 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2353 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2354 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2355 for (i=0;i<pcis->n;i++) { 2356 nnz_array[i] = 1.0; 2357 idxs_all[i] = i; 2358 } 2359 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2360 for (i=0;i<pcis->n_B;i++) { 2361 nnz_array[idxs_B[i]] = nnz[i]; 2362 } 2363 if (pcis->n) { 2364 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2365 } 2366 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2367 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2368 ierr = PetscFree(nnz);CHKERRQ(ierr); 2369 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2370 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2371 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2372 for (i=0;i<local_size;i++) { 2373 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2374 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2375 } 2376 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2377 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2378 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2379 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2380 2381 /* Set identity on dirichlet dofs */ 2382 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2383 for (i=0;i<pcis->n-pcis->n_B;i++) { 2384 PetscScalar one=1.0; 2385 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2386 } 2387 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2388 2389 /* Set values at interface dofs */ 2390 done = PETSC_TRUE; 2391 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2392 if (!done) { 2393 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2394 } 2395 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2396 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2397 ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2398 for (i=0;i<nvtxs;i++) { 2399 PetscInt row,*cols,ncols; 2400 PetscScalar *mat_vals; 2401 2402 row = idxs_B[i]; 2403 ncols = xadj[i+1]-xadj[i]; 2404 cols = idxs_mapped+xadj[i]; 2405 mat_vals = vals+xadj[i]; 2406 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2407 } 2408 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2409 if (!done) { 2410 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2411 } 2412 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2413 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2414 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2415 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2416 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2417 2418 /* check */ 2419 if (pcbddc->dbg_flag) { 2420 PetscReal error; 2421 Vec x,x_change; 2422 2423 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2424 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2425 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2426 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2427 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2428 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2429 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2430 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2431 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2432 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2433 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2434 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2435 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2436 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2437 ierr = VecDestroy(&x);CHKERRQ(ierr); 2438 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2439 } 2440 2441 /* adapt sub_schurs computed (if any) */ 2442 if (pcbddc->use_deluxe_scaling) { 2443 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2444 if (sub_schurs->n_subs_par_g) { 2445 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2446 } 2447 if (sub_schurs->S_Ej_all) { 2448 Mat S_1,S_2,tmat; 2449 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,sub_schurs->is_Ej_all,sub_schurs->is_Ej_all,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 2450 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2451 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2452 sub_schurs->S_Ej_all = S_1; 2453 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2454 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2455 sub_schurs->sum_S_Ej_all = S_2; 2456 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2457 } 2458 } 2459 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2460 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2461 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2462 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2463 } 2464 2465 /* set up change of basis context */ 2466 if (pcbddc->ChangeOfBasisMatrix) { 2467 PCBDDCChange_ctx change_ctx; 2468 2469 if (!pcbddc->new_global_mat) { 2470 PetscInt global_size,local_size; 2471 2472 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2473 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2474 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2475 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2476 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2477 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2478 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2479 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2480 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2481 } else { 2482 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2483 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2484 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2485 } 2486 if (!pcbddc->user_ChangeOfBasisMatrix) { 2487 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2488 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2489 } else { 2490 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2491 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2492 } 2493 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2494 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2495 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2496 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 } 2498 2499 /* get indices in local ordering for vertices and constraints */ 2500 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2501 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2502 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2503 } 2504 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2505 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2506 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2507 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2508 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2509 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2510 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2511 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2512 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2513 /* set quantities in PCBDDC data struct */ 2514 pcbddc->n_actual_vertices = i; 2515 /* check if a new primal space has been introduced */ 2516 pcbddc->new_primal_space_local = PETSC_TRUE; 2517 if (olocal_primal_size == pcbddc->local_primal_size) { 2518 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2519 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2520 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2521 } 2522 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2523 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2524 2525 /* flush dbg viewer */ 2526 if (pcbddc->dbg_flag) { 2527 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2528 } 2529 2530 /* free workspace */ 2531 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2532 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2533 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2534 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2535 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2536 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2542 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2543 { 2544 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2545 PC_IS *pcis = (PC_IS*)pc->data; 2546 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2547 PetscInt ierr,i,vertex_size; 2548 PetscViewer viewer=pcbddc->dbg_viewer; 2549 2550 PetscFunctionBegin; 2551 /* Reset previously computed graph */ 2552 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2553 /* Init local Graph struct */ 2554 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2555 2556 /* Check validity of the csr graph passed in by the user */ 2557 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2558 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2559 } 2560 2561 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2562 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2563 Mat mat_adj; 2564 PetscInt *xadj,*adjncy; 2565 PetscInt nvtxs; 2566 PetscBool flg_row=PETSC_TRUE; 2567 2568 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2569 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2570 if (!flg_row) { 2571 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2572 } 2573 if (pcbddc->use_local_adj) { 2574 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2575 pcbddc->computed_rowadj = PETSC_TRUE; 2576 } else { /* just compute subdomain's connected components */ 2577 IS is_dummy; 2578 ISLocalToGlobalMapping l2gmap_dummy; 2579 PetscInt j,sum; 2580 PetscInt *cxadj,*cadjncy; 2581 const PetscInt *idxs; 2582 PCBDDCGraph graph; 2583 PetscBT is_on_boundary; 2584 2585 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2586 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2587 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2588 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2589 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2590 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2591 graph->xadj = xadj; 2592 graph->adjncy = adjncy; 2593 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2594 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2595 2596 if (pcbddc->dbg_flag) { 2597 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2598 for (i=0;i<graph->ncc;i++) { 2599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2600 } 2601 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2602 } 2603 2604 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2605 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2606 for (i=0;i<pcis->n_B;i++) { 2607 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2608 } 2609 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2610 2611 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2612 sum = 0; 2613 for (i=0;i<graph->ncc;i++) { 2614 PetscInt sizecc = 0; 2615 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2616 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2617 sizecc++; 2618 } 2619 } 2620 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2621 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2622 cxadj[graph->queue[j]] = sizecc; 2623 } 2624 } 2625 sum += sizecc*sizecc; 2626 } 2627 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2628 sum = 0; 2629 for (i=0;i<nvtxs;i++) { 2630 PetscInt temp = cxadj[i]; 2631 cxadj[i] = sum; 2632 sum += temp; 2633 } 2634 cxadj[nvtxs] = sum; 2635 for (i=0;i<graph->ncc;i++) { 2636 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2637 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2638 PetscInt k,sizecc = 0; 2639 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2640 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2641 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2642 sizecc++; 2643 } 2644 } 2645 } 2646 } 2647 } 2648 if (nvtxs) { 2649 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2650 } else { 2651 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2652 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2653 } 2654 graph->xadj = 0; 2655 graph->adjncy = 0; 2656 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2657 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2658 } 2659 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2660 if (!flg_row) { 2661 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2662 } 2663 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2664 } 2665 2666 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2667 vertex_size = 1; 2668 if (pcbddc->user_provided_isfordofs) { 2669 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2670 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2671 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2672 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2673 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2674 } 2675 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2676 pcbddc->n_ISForDofs = 0; 2677 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2678 } 2679 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2680 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2681 } else { 2682 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2683 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2684 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2685 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2686 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2687 } 2688 } 2689 } 2690 2691 /* Setup of Graph */ 2692 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2693 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2694 } 2695 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2696 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2697 } 2698 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2699 2700 /* Graph's connected components analysis */ 2701 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2702 2703 /* print some info to stdout */ 2704 if (pcbddc->dbg_flag) { 2705 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2706 } 2707 2708 /* mark topography has done */ 2709 pcbddc->recompute_topography = PETSC_FALSE; 2710 PetscFunctionReturn(0); 2711 } 2712 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2715 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2716 { 2717 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2718 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2719 PetscErrorCode ierr; 2720 2721 PetscFunctionBegin; 2722 n = 0; 2723 vertices = 0; 2724 if (pcbddc->ConstraintMatrix) { 2725 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2726 for (i=0;i<local_primal_size;i++) { 2727 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2728 if (size_of_constraint == 1) n++; 2729 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2730 } 2731 if (vertices_idx) { 2732 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2733 n = 0; 2734 for (i=0;i<local_primal_size;i++) { 2735 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2736 if (size_of_constraint == 1) { 2737 vertices[n++]=row_cmat_indices[0]; 2738 } 2739 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2740 } 2741 } 2742 } 2743 *n_vertices = n; 2744 if (vertices_idx) *vertices_idx = vertices; 2745 PetscFunctionReturn(0); 2746 } 2747 2748 #undef __FUNCT__ 2749 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2750 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2751 { 2752 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2753 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2754 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2755 PetscBT touched; 2756 PetscErrorCode ierr; 2757 2758 /* This function assumes that the number of local constraints per connected component 2759 is not greater than the number of nodes defined for the connected component 2760 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2761 PetscFunctionBegin; 2762 n = 0; 2763 constraints_index = 0; 2764 if (pcbddc->ConstraintMatrix) { 2765 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2766 max_size_of_constraint = 0; 2767 for (i=0;i<local_primal_size;i++) { 2768 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2769 if (size_of_constraint > 1) { 2770 n++; 2771 } 2772 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2773 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2774 } 2775 if (constraints_idx) { 2776 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2777 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2778 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2779 n = 0; 2780 for (i=0;i<local_primal_size;i++) { 2781 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2782 if (size_of_constraint > 1) { 2783 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2784 /* find first untouched local node */ 2785 j = 0; 2786 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2787 min_index = row_cmat_global_indices[j]; 2788 min_loc = j; 2789 /* search the minimum among nodes not yet touched on the connected component 2790 since there can be more than one constraint on a single cc */ 2791 for (j=1;j<size_of_constraint;j++) { 2792 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2793 min_index = row_cmat_global_indices[j]; 2794 min_loc = j; 2795 } 2796 } 2797 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2798 constraints_index[n++] = row_cmat_indices[min_loc]; 2799 } 2800 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2801 } 2802 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2803 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2804 } 2805 } 2806 *n_constraints = n; 2807 if (constraints_idx) *constraints_idx = constraints_index; 2808 PetscFunctionReturn(0); 2809 } 2810 2811 #undef __FUNCT__ 2812 #define __FUNCT__ "PCBDDCSubsetNumbering" 2813 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2814 { 2815 Vec local_vec,global_vec; 2816 IS seqis,paris; 2817 VecScatter scatter_ctx; 2818 PetscScalar *array; 2819 PetscInt *temp_global_dofs; 2820 PetscScalar globalsum; 2821 PetscInt i,j,s; 2822 PetscInt nlocals,first_index,old_index,max_local; 2823 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2824 PetscMPIInt *dof_sizes,*dof_displs; 2825 PetscBool first_found; 2826 PetscErrorCode ierr; 2827 2828 PetscFunctionBegin; 2829 /* mpi buffers */ 2830 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2831 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2832 j = ( !rank_prec_comm ? size_prec_comm : 0); 2833 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2834 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2835 /* get maximum size of subset */ 2836 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2837 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2838 max_local = 0; 2839 for (i=0;i<n_local_dofs;i++) { 2840 if (max_local < temp_global_dofs[i] ) { 2841 max_local = temp_global_dofs[i]; 2842 } 2843 } 2844 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2845 max_global++; 2846 max_local = 0; 2847 for (i=0;i<n_local_dofs;i++) { 2848 if (max_local < local_dofs[i] ) { 2849 max_local = local_dofs[i]; 2850 } 2851 } 2852 max_local++; 2853 /* allocate workspace */ 2854 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2855 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2856 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2857 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2858 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2859 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2860 /* create scatter */ 2861 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2862 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2863 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2864 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2865 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2866 /* init array */ 2867 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2868 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2869 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2870 if (local_dofs_mult) { 2871 for (i=0;i<n_local_dofs;i++) { 2872 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2873 } 2874 } else { 2875 for (i=0;i<n_local_dofs;i++) { 2876 array[local_dofs[i]]=1.0; 2877 } 2878 } 2879 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2880 /* scatter into global vec and get total number of global dofs */ 2881 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2882 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2883 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2884 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2885 /* Fill global_vec with cumulative function for global numbering */ 2886 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2887 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2888 nlocals = 0; 2889 first_index = -1; 2890 first_found = PETSC_FALSE; 2891 for (i=0;i<s;i++) { 2892 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2893 first_found = PETSC_TRUE; 2894 first_index = i; 2895 } 2896 nlocals += (PetscInt)PetscRealPart(array[i]); 2897 } 2898 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2899 if (!rank_prec_comm) { 2900 dof_displs[0]=0; 2901 for (i=1;i<size_prec_comm;i++) { 2902 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2903 } 2904 } 2905 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2906 if (first_found) { 2907 array[first_index] += (PetscScalar)nlocals; 2908 old_index = first_index; 2909 for (i=first_index+1;i<s;i++) { 2910 if (PetscRealPart(array[i]) > 0.1) { 2911 array[i] += array[old_index]; 2912 old_index = i; 2913 } 2914 } 2915 } 2916 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2917 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2918 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2919 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2920 /* get global ordering of local dofs */ 2921 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2922 if (local_dofs_mult) { 2923 for (i=0;i<n_local_dofs;i++) { 2924 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2925 } 2926 } else { 2927 for (i=0;i<n_local_dofs;i++) { 2928 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2929 } 2930 } 2931 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2932 /* free workspace */ 2933 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2934 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2935 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2936 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2937 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2938 /* return pointer to global ordering of local dofs */ 2939 *global_numbering_subset = temp_global_dofs; 2940 PetscFunctionReturn(0); 2941 } 2942 2943 #undef __FUNCT__ 2944 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2945 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2946 { 2947 PetscInt i,j; 2948 PetscScalar *alphas; 2949 PetscErrorCode ierr; 2950 2951 PetscFunctionBegin; 2952 /* this implements stabilized Gram-Schmidt */ 2953 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2954 for (i=0;i<n;i++) { 2955 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2956 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2957 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2958 } 2959 ierr = PetscFree(alphas);CHKERRQ(ierr); 2960 PetscFunctionReturn(0); 2961 } 2962 2963 #undef __FUNCT__ 2964 #define __FUNCT__ "MatISGetSubassemblingPattern" 2965 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2966 { 2967 Mat subdomain_adj; 2968 IS new_ranks,ranks_send_to; 2969 MatPartitioning partitioner; 2970 Mat_IS *matis; 2971 PetscInt n_neighs,*neighs,*n_shared,**shared; 2972 PetscInt prank; 2973 PetscMPIInt size,rank,color; 2974 PetscInt *xadj,*adjncy,*oldranks; 2975 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2976 PetscInt i,local_size,threshold=0; 2977 PetscErrorCode ierr; 2978 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2979 PetscSubcomm subcomm; 2980 2981 PetscFunctionBegin; 2982 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2983 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2984 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2985 2986 /* Get info on mapping */ 2987 matis = (Mat_IS*)(mat->data); 2988 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2989 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2990 2991 /* build local CSR graph of subdomains' connectivity */ 2992 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2993 xadj[0] = 0; 2994 xadj[1] = PetscMax(n_neighs-1,0); 2995 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2996 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2997 2998 if (threshold) { 2999 PetscInt xadj_count = 0; 3000 for (i=1;i<n_neighs;i++) { 3001 if (n_shared[i] > threshold) { 3002 adjncy[xadj_count] = neighs[i]; 3003 adjncy_wgt[xadj_count] = n_shared[i]; 3004 xadj_count++; 3005 } 3006 } 3007 xadj[1] = xadj_count; 3008 } else { 3009 if (xadj[1]) { 3010 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3011 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3012 } 3013 } 3014 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3015 if (use_square) { 3016 for (i=0;i<xadj[1];i++) { 3017 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3018 } 3019 } 3020 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3021 3022 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3023 3024 /* 3025 Restrict work on active processes only. 3026 */ 3027 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3028 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3029 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3030 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3031 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3032 if (color) { 3033 ierr = PetscFree(xadj);CHKERRQ(ierr); 3034 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3035 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3036 } else { 3037 PetscInt coarsening_ratio; 3038 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3039 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3040 prank = rank; 3041 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3042 /* 3043 for (i=0;i<size;i++) { 3044 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3045 } 3046 */ 3047 for (i=0;i<xadj[1];i++) { 3048 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3049 } 3050 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3051 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3052 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3053 3054 /* Partition */ 3055 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3056 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3057 if (use_vwgt) { 3058 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3059 v_wgt[0] = local_size; 3060 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3061 } 3062 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3063 coarsening_ratio = size/n_subdomains; 3064 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3065 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3066 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3067 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3068 3069 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3070 if (contiguous) { 3071 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3072 } else { 3073 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3074 } 3075 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3076 /* clean up */ 3077 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3078 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3079 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3080 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3081 } 3082 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3083 3084 /* assemble parallel IS for sends */ 3085 i = 1; 3086 if (color) i=0; 3087 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3088 3089 /* get back IS */ 3090 *is_sends = ranks_send_to; 3091 PetscFunctionReturn(0); 3092 } 3093 3094 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3095 3096 #undef __FUNCT__ 3097 #define __FUNCT__ "MatISSubassemble" 3098 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3099 { 3100 Mat local_mat; 3101 Mat_IS *matis; 3102 IS is_sends_internal; 3103 PetscInt rows,cols,new_local_rows; 3104 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3105 PetscBool ismatis,isdense,newisdense,destroy_mat; 3106 ISLocalToGlobalMapping l2gmap; 3107 PetscInt* l2gmap_indices; 3108 const PetscInt* is_indices; 3109 MatType new_local_type; 3110 /* buffers */ 3111 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3112 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3113 PetscInt *recv_buffer_idxs_local; 3114 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3115 /* MPI */ 3116 MPI_Comm comm,comm_n; 3117 PetscSubcomm subcomm; 3118 PetscMPIInt n_sends,n_recvs,commsize; 3119 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3120 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3121 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3122 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3123 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3124 PetscErrorCode ierr; 3125 3126 PetscFunctionBegin; 3127 /* TODO: add missing checks */ 3128 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3129 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3130 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3131 PetscValidLogicalCollectiveInt(mat,nis,7); 3132 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3133 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3134 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3135 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3136 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3137 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3138 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3139 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3140 PetscInt mrows,mcols,mnrows,mncols; 3141 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3142 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3143 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3144 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3145 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3146 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3147 } 3148 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3149 PetscValidLogicalCollectiveInt(mat,bs,0); 3150 /* prepare IS for sending if not provided */ 3151 if (!is_sends) { 3152 PetscBool pcontig = PETSC_TRUE; 3153 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3154 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3155 } else { 3156 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3157 is_sends_internal = is_sends; 3158 } 3159 3160 /* get pointer of MATIS data */ 3161 matis = (Mat_IS*)mat->data; 3162 3163 /* get comm */ 3164 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3165 3166 /* compute number of sends */ 3167 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3168 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3169 3170 /* compute number of receives */ 3171 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3172 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3173 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3174 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3175 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3176 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3177 ierr = PetscFree(iflags);CHKERRQ(ierr); 3178 3179 /* restrict comm if requested */ 3180 subcomm = 0; 3181 destroy_mat = PETSC_FALSE; 3182 if (restrict_comm) { 3183 PetscMPIInt color,rank,subcommsize; 3184 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3185 color = 0; 3186 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3187 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3188 subcommsize = commsize - subcommsize; 3189 /* check if reuse has been requested */ 3190 if (reuse == MAT_REUSE_MATRIX) { 3191 if (*mat_n) { 3192 PetscMPIInt subcommsize2; 3193 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3194 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3195 comm_n = PetscObjectComm((PetscObject)*mat_n); 3196 } else { 3197 comm_n = PETSC_COMM_SELF; 3198 } 3199 } else { /* MAT_INITIAL_MATRIX */ 3200 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3201 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3202 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3203 comm_n = subcomm->comm; 3204 } 3205 /* flag to destroy *mat_n if not significative */ 3206 if (color) destroy_mat = PETSC_TRUE; 3207 } else { 3208 comm_n = comm; 3209 } 3210 3211 /* prepare send/receive buffers */ 3212 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3213 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3214 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3215 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3216 if (nis) { 3217 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3218 } 3219 3220 /* Get data from local matrices */ 3221 if (!isdense) { 3222 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3223 /* TODO: See below some guidelines on how to prepare the local buffers */ 3224 /* 3225 send_buffer_vals should contain the raw values of the local matrix 3226 send_buffer_idxs should contain: 3227 - MatType_PRIVATE type 3228 - PetscInt size_of_l2gmap 3229 - PetscInt global_row_indices[size_of_l2gmap] 3230 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3231 */ 3232 } else { 3233 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3234 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3235 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3236 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3237 send_buffer_idxs[1] = i; 3238 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3239 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3240 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3241 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3242 for (i=0;i<n_sends;i++) { 3243 ilengths_vals[is_indices[i]] = len*len; 3244 ilengths_idxs[is_indices[i]] = len+2; 3245 } 3246 } 3247 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3248 /* additional is (if any) */ 3249 if (nis) { 3250 PetscMPIInt psum; 3251 PetscInt j; 3252 for (j=0,psum=0;j<nis;j++) { 3253 PetscInt plen; 3254 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3255 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3256 psum += len+1; /* indices + lenght */ 3257 } 3258 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3259 for (j=0,psum=0;j<nis;j++) { 3260 PetscInt plen; 3261 const PetscInt *is_array_idxs; 3262 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3263 send_buffer_idxs_is[psum] = plen; 3264 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3265 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3266 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3267 psum += plen+1; /* indices + lenght */ 3268 } 3269 for (i=0;i<n_sends;i++) { 3270 ilengths_idxs_is[is_indices[i]] = psum; 3271 } 3272 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3273 } 3274 3275 buf_size_idxs = 0; 3276 buf_size_vals = 0; 3277 buf_size_idxs_is = 0; 3278 for (i=0;i<n_recvs;i++) { 3279 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3280 buf_size_vals += (PetscInt)olengths_vals[i]; 3281 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3282 } 3283 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3284 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3285 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3286 3287 /* get new tags for clean communications */ 3288 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3289 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3290 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3291 3292 /* allocate for requests */ 3293 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3294 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3295 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3296 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3297 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3298 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3299 3300 /* communications */ 3301 ptr_idxs = recv_buffer_idxs; 3302 ptr_vals = recv_buffer_vals; 3303 ptr_idxs_is = recv_buffer_idxs_is; 3304 for (i=0;i<n_recvs;i++) { 3305 source_dest = onodes[i]; 3306 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3307 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3308 ptr_idxs += olengths_idxs[i]; 3309 ptr_vals += olengths_vals[i]; 3310 if (nis) { 3311 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3312 ptr_idxs_is += olengths_idxs_is[i]; 3313 } 3314 } 3315 for (i=0;i<n_sends;i++) { 3316 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3317 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3318 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3319 if (nis) { 3320 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3321 } 3322 } 3323 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3324 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3325 3326 /* assemble new l2g map */ 3327 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3328 ptr_idxs = recv_buffer_idxs; 3329 new_local_rows = 0; 3330 for (i=0;i<n_recvs;i++) { 3331 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3332 ptr_idxs += olengths_idxs[i]; 3333 } 3334 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3335 ptr_idxs = recv_buffer_idxs; 3336 new_local_rows = 0; 3337 for (i=0;i<n_recvs;i++) { 3338 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3339 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3340 ptr_idxs += olengths_idxs[i]; 3341 } 3342 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3343 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3344 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3345 3346 /* infer new local matrix type from received local matrices type */ 3347 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3348 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3349 if (n_recvs) { 3350 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3351 ptr_idxs = recv_buffer_idxs; 3352 for (i=0;i<n_recvs;i++) { 3353 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3354 new_local_type_private = MATAIJ_PRIVATE; 3355 break; 3356 } 3357 ptr_idxs += olengths_idxs[i]; 3358 } 3359 switch (new_local_type_private) { 3360 case MATDENSE_PRIVATE: 3361 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3362 new_local_type = MATSEQAIJ; 3363 bs = 1; 3364 } else { /* if I receive only 1 dense matrix */ 3365 new_local_type = MATSEQDENSE; 3366 bs = 1; 3367 } 3368 break; 3369 case MATAIJ_PRIVATE: 3370 new_local_type = MATSEQAIJ; 3371 bs = 1; 3372 break; 3373 case MATBAIJ_PRIVATE: 3374 new_local_type = MATSEQBAIJ; 3375 break; 3376 case MATSBAIJ_PRIVATE: 3377 new_local_type = MATSEQSBAIJ; 3378 break; 3379 default: 3380 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3381 break; 3382 } 3383 } else { /* by default, new_local_type is seqdense */ 3384 new_local_type = MATSEQDENSE; 3385 bs = 1; 3386 } 3387 3388 /* create MATIS object if needed */ 3389 if (reuse == MAT_INITIAL_MATRIX) { 3390 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3391 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3392 } else { 3393 /* it also destroys the local matrices */ 3394 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3395 } 3396 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3397 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3398 3399 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3400 3401 /* Global to local map of received indices */ 3402 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3403 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3404 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3405 3406 /* restore attributes -> type of incoming data and its size */ 3407 buf_size_idxs = 0; 3408 for (i=0;i<n_recvs;i++) { 3409 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3410 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3411 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3412 } 3413 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3414 3415 /* set preallocation */ 3416 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3417 if (!newisdense) { 3418 PetscInt *new_local_nnz=0; 3419 3420 ptr_vals = recv_buffer_vals; 3421 ptr_idxs = recv_buffer_idxs_local; 3422 if (n_recvs) { 3423 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3424 } 3425 for (i=0;i<n_recvs;i++) { 3426 PetscInt j; 3427 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3428 for (j=0;j<*(ptr_idxs+1);j++) { 3429 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3430 } 3431 } else { 3432 /* TODO */ 3433 } 3434 ptr_idxs += olengths_idxs[i]; 3435 } 3436 if (new_local_nnz) { 3437 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3438 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3439 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3440 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3441 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3442 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3443 } else { 3444 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3445 } 3446 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3447 } else { 3448 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3449 } 3450 3451 /* set values */ 3452 ptr_vals = recv_buffer_vals; 3453 ptr_idxs = recv_buffer_idxs_local; 3454 for (i=0;i<n_recvs;i++) { 3455 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3456 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3457 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3458 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3459 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3460 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3461 } else { 3462 /* TODO */ 3463 } 3464 ptr_idxs += olengths_idxs[i]; 3465 ptr_vals += olengths_vals[i]; 3466 } 3467 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3468 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3469 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3470 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3471 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3472 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3473 3474 #if 0 3475 if (!restrict_comm) { /* check */ 3476 Vec lvec,rvec; 3477 PetscReal infty_error; 3478 3479 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3480 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3481 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3482 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3483 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3484 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3485 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3486 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3487 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3488 } 3489 #endif 3490 3491 /* assemble new additional is (if any) */ 3492 if (nis) { 3493 PetscInt **temp_idxs,*count_is,j,psum; 3494 3495 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3496 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3497 ptr_idxs = recv_buffer_idxs_is; 3498 psum = 0; 3499 for (i=0;i<n_recvs;i++) { 3500 for (j=0;j<nis;j++) { 3501 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3502 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3503 psum += plen; 3504 ptr_idxs += plen+1; /* shift pointer to received data */ 3505 } 3506 } 3507 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3508 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3509 for (i=1;i<nis;i++) { 3510 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3511 } 3512 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3513 ptr_idxs = recv_buffer_idxs_is; 3514 for (i=0;i<n_recvs;i++) { 3515 for (j=0;j<nis;j++) { 3516 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3517 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3518 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3519 ptr_idxs += plen+1; /* shift pointer to received data */ 3520 } 3521 } 3522 for (i=0;i<nis;i++) { 3523 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3524 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3525 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3526 } 3527 ierr = PetscFree(count_is);CHKERRQ(ierr); 3528 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3529 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3530 } 3531 /* free workspace */ 3532 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3533 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3534 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3535 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3536 if (isdense) { 3537 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3538 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3539 } else { 3540 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3541 } 3542 if (nis) { 3543 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3544 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3545 } 3546 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3547 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3548 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3549 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3550 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3551 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3552 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3553 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3554 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3555 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3556 ierr = PetscFree(onodes);CHKERRQ(ierr); 3557 if (nis) { 3558 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3559 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3560 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3561 } 3562 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3563 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3564 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3565 for (i=0;i<nis;i++) { 3566 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3567 } 3568 } 3569 PetscFunctionReturn(0); 3570 } 3571 3572 /* temporary hack into ksp private data structure */ 3573 #include <petsc-private/kspimpl.h> 3574 3575 #undef __FUNCT__ 3576 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3577 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3578 { 3579 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3580 PC_IS *pcis = (PC_IS*)pc->data; 3581 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3582 MatNullSpace CoarseNullSpace=NULL; 3583 ISLocalToGlobalMapping coarse_islg; 3584 IS coarse_is,*isarray; 3585 PetscInt i,im_active=-1,active_procs=-1; 3586 PetscInt nis,nisdofs,nisneu; 3587 PC pc_temp; 3588 PCType coarse_pc_type; 3589 KSPType coarse_ksp_type; 3590 PetscBool multilevel_requested,multilevel_allowed; 3591 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3592 Mat t_coarse_mat_is; 3593 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3594 PetscMPIInt all_procs; 3595 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3596 PetscBool compute_vecs = PETSC_FALSE; 3597 PetscScalar *array; 3598 PetscErrorCode ierr; 3599 3600 PetscFunctionBegin; 3601 /* Assign global numbering to coarse dofs */ 3602 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3603 compute_vecs = PETSC_TRUE; 3604 PetscInt ocoarse_size; 3605 ocoarse_size = pcbddc->coarse_size; 3606 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3607 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3608 /* see if we can avoid some work */ 3609 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3610 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3611 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3612 coarse_reuse = PETSC_FALSE; 3613 } else { /* we can safely reuse already computed coarse matrix */ 3614 coarse_reuse = PETSC_TRUE; 3615 } 3616 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3617 coarse_reuse = PETSC_FALSE; 3618 } 3619 /* reset any subassembling information */ 3620 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3621 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3622 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3623 coarse_reuse = PETSC_TRUE; 3624 } 3625 3626 /* count "active" (i.e. with positive local size) and "void" processes */ 3627 im_active = !!(pcis->n); 3628 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3629 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3630 void_procs = all_procs-active_procs; 3631 csin_type_simple = PETSC_TRUE; 3632 redist = PETSC_FALSE; 3633 if (pcbddc->current_level && void_procs) { 3634 csin_ml = PETSC_TRUE; 3635 ncoarse_ml = void_procs; 3636 csin_ds = PETSC_TRUE; 3637 ncoarse_ds = void_procs; 3638 } else { 3639 csin_ml = PETSC_FALSE; 3640 ncoarse_ml = all_procs; 3641 if (void_procs) { 3642 csin_ds = PETSC_TRUE; 3643 ncoarse_ds = void_procs; 3644 csin_type_simple = PETSC_FALSE; 3645 } else { 3646 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3647 csin_ds = PETSC_TRUE; 3648 ncoarse_ds = pcbddc->redistribute_coarse; 3649 redist = PETSC_TRUE; 3650 } else { 3651 csin_ds = PETSC_FALSE; 3652 ncoarse_ds = all_procs; 3653 } 3654 } 3655 } 3656 3657 /* 3658 test if we can go multilevel: three conditions must be satisfied: 3659 - we have not exceeded the number of levels requested 3660 - we can actually subassemble the active processes 3661 - we can find a suitable number of MPI processes where we can place the subassembled problem 3662 */ 3663 multilevel_allowed = PETSC_FALSE; 3664 multilevel_requested = PETSC_FALSE; 3665 if (pcbddc->current_level < pcbddc->max_levels) { 3666 multilevel_requested = PETSC_TRUE; 3667 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3668 multilevel_allowed = PETSC_FALSE; 3669 } else { 3670 multilevel_allowed = PETSC_TRUE; 3671 } 3672 } 3673 /* determine number of process partecipating to coarse solver */ 3674 if (multilevel_allowed) { 3675 ncoarse = ncoarse_ml; 3676 csin = csin_ml; 3677 } else { 3678 ncoarse = ncoarse_ds; 3679 csin = csin_ds; 3680 } 3681 3682 /* creates temporary l2gmap and IS for coarse indexes */ 3683 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3684 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3685 3686 /* creates temporary MATIS object for coarse matrix */ 3687 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3688 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3689 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3690 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3691 #if 0 3692 { 3693 PetscViewer viewer; 3694 char filename[256]; 3695 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3696 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3697 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3698 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3699 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3700 } 3701 #endif 3702 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3703 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3704 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3705 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3706 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3707 3708 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3709 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3710 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3711 const PetscInt *idxs; 3712 ISLocalToGlobalMapping tmap; 3713 3714 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3715 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3716 /* allocate space for temporary storage */ 3717 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3718 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3719 /* allocate for IS array */ 3720 nisdofs = pcbddc->n_ISForDofsLocal; 3721 nisneu = !!pcbddc->NeumannBoundariesLocal; 3722 nis = nisdofs + nisneu; 3723 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3724 /* dofs splitting */ 3725 for (i=0;i<nisdofs;i++) { 3726 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3727 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3728 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3729 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3730 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3731 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3732 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3733 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3734 } 3735 /* neumann boundaries */ 3736 if (pcbddc->NeumannBoundariesLocal) { 3737 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3738 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3739 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3740 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3741 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3742 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3743 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3744 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3745 } 3746 /* free memory */ 3747 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3748 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3749 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3750 } else { 3751 nis = 0; 3752 nisdofs = 0; 3753 nisneu = 0; 3754 isarray = NULL; 3755 } 3756 /* destroy no longer needed map */ 3757 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3758 3759 /* restrict on coarse candidates (if needed) */ 3760 coarse_mat_is = NULL; 3761 if (csin) { 3762 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3763 if (redist) { 3764 PetscMPIInt rank; 3765 PetscInt spc,n_spc_p1,dest[1]; 3766 3767 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3768 spc = all_procs/pcbddc->redistribute_coarse; 3769 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3770 if (rank > n_spc_p1*(spc+1)-1) { 3771 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3772 } else { 3773 dest[0] = rank/(spc+1); 3774 } 3775 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3776 } else { 3777 PetscInt j,tissize,*nisindices; 3778 PetscInt *coarse_candidates; 3779 const PetscInt* tisindices; 3780 /* get coarse candidates' ranks in pc communicator */ 3781 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3782 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3783 for (i=0,j=0;i<all_procs;i++) { 3784 if (!coarse_candidates[i]) { 3785 coarse_candidates[j]=i; 3786 j++; 3787 } 3788 } 3789 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3790 /* get a suitable subassembling pattern */ 3791 if (csin_type_simple) { 3792 PetscMPIInt rank; 3793 PetscInt issize,isidx; 3794 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3795 if (im_active) { 3796 issize = 1; 3797 isidx = (PetscInt)rank; 3798 } else { 3799 issize = 0; 3800 isidx = -1; 3801 } 3802 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3803 } else { 3804 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3805 } 3806 if (pcbddc->dbg_flag) { 3807 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3808 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3809 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3810 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3811 for (i=0;i<j;i++) { 3812 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3813 } 3814 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3815 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3816 } 3817 /* shift the pattern on coarse candidates */ 3818 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3819 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3820 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3821 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3822 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3823 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3824 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3825 } 3826 } 3827 if (pcbddc->dbg_flag) { 3828 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3829 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3830 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3831 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3832 } 3833 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3834 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3835 } else { 3836 if (pcbddc->dbg_flag) { 3837 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3838 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3840 } 3841 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3842 coarse_mat_is = t_coarse_mat_is; 3843 } 3844 3845 /* create local to global scatters for coarse problem */ 3846 if (compute_vecs) { 3847 PetscInt lrows; 3848 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3849 if (coarse_mat_is) { 3850 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3851 } else { 3852 lrows = 0; 3853 } 3854 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3855 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3856 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3857 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3858 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3859 } 3860 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3861 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3862 3863 /* set defaults for coarse KSP and PC */ 3864 if (multilevel_allowed) { 3865 coarse_ksp_type = KSPRICHARDSON; 3866 coarse_pc_type = PCBDDC; 3867 } else { 3868 coarse_ksp_type = KSPPREONLY; 3869 coarse_pc_type = PCREDUNDANT; 3870 } 3871 3872 /* print some info if requested */ 3873 if (pcbddc->dbg_flag) { 3874 if (!multilevel_allowed) { 3875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3876 if (multilevel_requested) { 3877 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3878 } else if (pcbddc->max_levels) { 3879 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3880 } 3881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3882 } 3883 } 3884 3885 /* create the coarse KSP object only once with defaults */ 3886 if (coarse_mat_is) { 3887 MatReuse coarse_mat_reuse; 3888 PetscViewer dbg_viewer = NULL; 3889 if (pcbddc->dbg_flag) { 3890 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3891 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3892 } 3893 if (!pcbddc->coarse_ksp) { 3894 char prefix[256],str_level[16]; 3895 size_t len; 3896 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3897 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3898 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3899 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3900 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3901 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3902 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3903 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3904 /* prefix */ 3905 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3906 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3907 if (!pcbddc->current_level) { 3908 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3909 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3910 } else { 3911 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3912 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3913 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3914 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3915 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3916 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3917 } 3918 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3919 /* allow user customization */ 3920 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3921 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3922 } 3923 3924 /* get some info after set from options */ 3925 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3926 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3927 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3928 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3929 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3930 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3931 isbddc = PETSC_FALSE; 3932 } 3933 if (isredundant) { 3934 KSP inner_ksp; 3935 PC inner_pc; 3936 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3937 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3938 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3939 } 3940 3941 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3942 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3943 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3944 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3945 if (nisdofs) { 3946 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3947 for (i=0;i<nisdofs;i++) { 3948 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3949 } 3950 } 3951 if (nisneu) { 3952 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3953 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3954 } 3955 3956 /* assemble coarse matrix */ 3957 if (coarse_reuse) { 3958 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3959 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3960 coarse_mat_reuse = MAT_REUSE_MATRIX; 3961 } else { 3962 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3963 } 3964 if (isbddc || isnn) { 3965 if (pcbddc->coarsening_ratio > 1) { 3966 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3967 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3968 if (pcbddc->dbg_flag) { 3969 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3970 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3971 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3972 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3973 } 3974 } 3975 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3976 } else { 3977 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3978 coarse_mat = coarse_mat_is; 3979 } 3980 } else { 3981 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3982 } 3983 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3984 3985 /* propagate symmetry info to coarse matrix */ 3986 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3987 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3988 3989 /* set operators */ 3990 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3991 if (pcbddc->dbg_flag) { 3992 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3993 } 3994 } else { /* processes non partecipating to coarse solver (if any) */ 3995 coarse_mat = 0; 3996 } 3997 ierr = PetscFree(isarray);CHKERRQ(ierr); 3998 #if 0 3999 { 4000 PetscViewer viewer; 4001 char filename[256]; 4002 sprintf(filename,"coarse_mat.m"); 4003 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4004 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4005 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4006 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4007 } 4008 #endif 4009 4010 /* Compute coarse null space (special handling by BDDC only) */ 4011 if (pcbddc->NullSpace) { 4012 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4013 } 4014 4015 if (pcbddc->coarse_ksp) { 4016 Vec crhs,csol; 4017 PetscBool ispreonly; 4018 if (CoarseNullSpace) { 4019 if (isbddc) { 4020 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4021 } else { 4022 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4023 } 4024 } 4025 /* setup coarse ksp */ 4026 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4027 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4028 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4029 /* hack */ 4030 if (!csol) { 4031 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4032 } 4033 if (!crhs) { 4034 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4035 } 4036 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4037 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4038 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4039 KSP check_ksp; 4040 KSPType check_ksp_type; 4041 PC check_pc; 4042 Vec check_vec,coarse_vec; 4043 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4044 PetscInt its; 4045 PetscBool compute_eigs; 4046 PetscReal *eigs_r,*eigs_c; 4047 PetscInt neigs; 4048 const char *prefix; 4049 4050 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4051 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4052 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4053 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4054 if (ispreonly) { 4055 check_ksp_type = KSPPREONLY; 4056 compute_eigs = PETSC_FALSE; 4057 } else { 4058 check_ksp_type = KSPGMRES; 4059 compute_eigs = PETSC_TRUE; 4060 } 4061 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4062 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4063 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4064 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4065 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4066 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4067 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4068 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4069 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4070 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4071 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4072 /* create random vec */ 4073 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4074 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4075 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4076 if (CoarseNullSpace) { 4077 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4078 } 4079 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4080 /* solve coarse problem */ 4081 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4082 if (CoarseNullSpace) { 4083 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4084 } 4085 /* set eigenvalue estimation if preonly has not been requested */ 4086 if (compute_eigs) { 4087 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4088 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4089 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4090 lambda_max = eigs_r[neigs-1]; 4091 lambda_min = eigs_r[0]; 4092 if (pcbddc->use_coarse_estimates) { 4093 if (lambda_max>lambda_min) { 4094 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4095 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4096 } 4097 } 4098 } 4099 4100 /* check coarse problem residual error */ 4101 if (pcbddc->dbg_flag) { 4102 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4103 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4104 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4105 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4106 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4107 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4108 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4109 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4110 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4111 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4112 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4113 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4114 if (compute_eigs) { 4115 PetscReal lambda_max_s,lambda_min_s; 4116 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4117 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4118 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4119 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4120 for (i=0;i<neigs;i++) { 4121 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4122 } 4123 } 4124 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4125 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4126 } 4127 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4128 if (compute_eigs) { 4129 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4130 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4131 } 4132 } 4133 } 4134 /* print additional info */ 4135 if (pcbddc->dbg_flag) { 4136 /* waits until all processes reaches this point */ 4137 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4138 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4139 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4140 } 4141 4142 /* free memory */ 4143 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4144 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4145 PetscFunctionReturn(0); 4146 } 4147 4148 #undef __FUNCT__ 4149 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4150 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4151 { 4152 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4153 PC_IS* pcis = (PC_IS*)pc->data; 4154 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4155 PetscInt i,coarse_size; 4156 PetscInt *local_primal_indices; 4157 PetscErrorCode ierr; 4158 4159 PetscFunctionBegin; 4160 /* Compute global number of coarse dofs */ 4161 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4162 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4163 } 4164 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4165 4166 /* check numbering */ 4167 if (pcbddc->dbg_flag) { 4168 PetscScalar coarsesum,*array; 4169 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4170 4171 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4172 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4173 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4174 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4175 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4176 for (i=0;i<pcbddc->local_primal_size;i++) { 4177 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4178 } 4179 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4180 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4181 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4182 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4183 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4184 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4185 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4186 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4187 for (i=0;i<pcis->n;i++) { 4188 if (array[i] == 1.0) { 4189 set_error = PETSC_TRUE; 4190 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4191 } 4192 } 4193 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4194 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4195 for (i=0;i<pcis->n;i++) { 4196 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4197 } 4198 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4199 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4200 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4201 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4202 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4203 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4204 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4205 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4206 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4208 for (i=0;i<pcbddc->local_primal_size;i++) { 4209 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4210 } 4211 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4212 } 4213 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4214 if (set_error_reduced) { 4215 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4216 } 4217 } 4218 /* get back data */ 4219 *coarse_size_n = coarse_size; 4220 *local_primal_indices_n = local_primal_indices; 4221 PetscFunctionReturn(0); 4222 } 4223 4224 #undef __FUNCT__ 4225 #define __FUNCT__ "PCBDDCGlobalToLocal" 4226 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4227 { 4228 IS localis_t; 4229 PetscInt i,lsize,*idxs,n; 4230 PetscScalar *vals; 4231 PetscErrorCode ierr; 4232 4233 PetscFunctionBegin; 4234 /* get indices in local ordering exploiting local to global map */ 4235 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4236 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4237 for (i=0;i<lsize;i++) vals[i] = 1.0; 4238 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4239 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4240 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4241 if (idxs) { /* multilevel guard */ 4242 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4243 } 4244 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4245 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4246 ierr = PetscFree(vals);CHKERRQ(ierr); 4247 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4248 /* now compute set in local ordering */ 4249 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4250 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4251 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4252 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4253 for (i=0,lsize=0;i<n;i++) { 4254 if (PetscRealPart(vals[i]) > 0.5) { 4255 lsize++; 4256 } 4257 } 4258 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4259 for (i=0,lsize=0;i<n;i++) { 4260 if (PetscRealPart(vals[i]) > 0.5) { 4261 idxs[lsize++] = i; 4262 } 4263 } 4264 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4265 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4266 *localis = localis_t; 4267 PetscFunctionReturn(0); 4268 } 4269 4270 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4271 #undef __FUNCT__ 4272 #define __FUNCT__ "PCBDDCMatMult_Private" 4273 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4274 { 4275 PCBDDCChange_ctx change_ctx; 4276 PetscErrorCode ierr; 4277 4278 PetscFunctionBegin; 4279 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4280 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4281 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4282 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4283 PetscFunctionReturn(0); 4284 } 4285 4286 #undef __FUNCT__ 4287 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4288 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4289 { 4290 PCBDDCChange_ctx change_ctx; 4291 PetscErrorCode ierr; 4292 4293 PetscFunctionBegin; 4294 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4295 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4296 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4297 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4298 PetscFunctionReturn(0); 4299 } 4300 4301 #undef __FUNCT__ 4302 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4303 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc, PetscInt layers, PetscBool use_useradj, PetscBool compute_Stilda) 4304 { 4305 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4306 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4307 PetscInt *used_xadj,*used_adjncy; 4308 PetscBool free_used_adj; 4309 PetscErrorCode ierr; 4310 4311 PetscFunctionBegin; 4312 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4313 free_used_adj = PETSC_FALSE; 4314 if (layers == -1) { 4315 used_xadj = NULL; 4316 used_adjncy = NULL; 4317 } else { 4318 if ( (use_useradj && pcbddc->mat_graph->xadj) || pcbddc->computed_rowadj) { 4319 used_xadj = pcbddc->mat_graph->xadj; 4320 used_adjncy = pcbddc->mat_graph->adjncy; 4321 } else { 4322 Mat mat_adj; 4323 PetscBool flg_row=PETSC_TRUE; 4324 const PetscInt *xadj,*adjncy; 4325 PetscInt nvtxs; 4326 4327 ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 4328 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4329 if (!flg_row) { 4330 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 4331 } 4332 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4333 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4334 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4335 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4336 if (!flg_row) { 4337 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 4338 } 4339 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 4340 free_used_adj = PETSC_TRUE; 4341 } 4342 } 4343 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,layers,compute_Stilda);CHKERRQ(ierr); 4344 4345 /* free adjacency */ 4346 if (free_used_adj) { 4347 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4348 } 4349 PetscFunctionReturn(0); 4350 } 4351 4352 #undef __FUNCT__ 4353 #define __FUNCT__ "PCBDDCInitSubSchurs" 4354 PetscErrorCode PCBDDCInitSubSchurs(PC pc, PetscBool rebuild, PetscInt threshold) 4355 { 4356 PC_IS *pcis=(PC_IS*)pc->data; 4357 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4358 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4359 PCBDDCGraph graph; 4360 Mat S_j; 4361 PetscErrorCode ierr; 4362 4363 PetscFunctionBegin; 4364 /* attach interface graph for determining subsets */ 4365 if (rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4366 IS verticesIS; 4367 4368 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4369 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4370 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4371 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4372 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4373 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4374 /* 4375 if (pcbddc->dbg_flag) { 4376 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4377 } 4378 */ 4379 } else { 4380 graph = pcbddc->mat_graph; 4381 } 4382 4383 /* Create Schur complement matrix */ 4384 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4385 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4386 4387 /* sub_schurs init */ 4388 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,threshold);CHKERRQ(ierr); 4389 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4390 /* free graph struct */ 4391 if (rebuild) { 4392 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4393 } 4394 PetscFunctionReturn(0); 4395 } 4396