1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #if 0 9 #undef __FUNCT__ 10 #define __FUNCT__ "PCBDDCAdaptiveSelection" 11 PetscErrorCode PCBDDCAdaptiveSelection(PC pc, MatNullSpace* adapt_nnsp) 12 { 13 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs[0],pcbddc->mat_graph,...);CHKERRQ(ierr); 18 PetscFunctionReturn(0); 19 } 20 #endif 21 22 #undef __FUNCT__ 23 #define __FUNCT__ "PCBDDCSetUpSolvers" 24 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 25 { 26 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 27 PetscScalar *coarse_submat_vals; 28 PetscErrorCode ierr; 29 30 PetscFunctionBegin; 31 /* Setup local scatters R_to_B and (optionally) R_to_D */ 32 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 33 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 34 35 /* Setup local neumann solver ksp_R */ 36 /* PCBDDCSetUpLocalScatters should be called first! */ 37 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 38 39 /* Change global null space passed in by the user if change of basis has been requested */ 40 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 41 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 42 } 43 44 /* 45 Setup local correction and local part of coarse basis. 46 Gives back the dense local part of the coarse matrix in column major ordering 47 */ 48 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 49 50 /* Compute total number of coarse nodes and setup coarse solver */ 51 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 52 53 /* free */ 54 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "PCBDDCResetCustomization" 60 PetscErrorCode PCBDDCResetCustomization(PC pc) 61 { 62 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 63 PetscErrorCode ierr; 64 65 PetscFunctionBegin; 66 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 67 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 68 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 69 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 70 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 71 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 72 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 73 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 74 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 75 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 76 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetTopography" 82 PetscErrorCode PCBDDCResetTopography(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 89 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 91 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 92 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs[0]);CHKERRQ(ierr); 93 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs[1]);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCResetSolvers" 99 PetscErrorCode PCBDDCResetSolvers(PC pc) 100 { 101 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 102 PetscErrorCode ierr; 103 104 PetscFunctionBegin; 105 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 106 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 108 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 110 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 111 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 112 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 113 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 114 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 115 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 116 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 117 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 118 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 119 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 120 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 121 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 122 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 123 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 124 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 125 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 126 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 127 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 128 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 134 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 135 { 136 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 137 PC_IS *pcis = (PC_IS*)pc->data; 138 VecType impVecType; 139 PetscInt n_constraints,n_R,old_size; 140 PetscErrorCode ierr; 141 142 PetscFunctionBegin; 143 if (!pcbddc->ConstraintMatrix) { 144 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 145 } 146 /* get sizes */ 147 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 148 n_R = pcis->n-pcbddc->n_actual_vertices; 149 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 150 /* local work vectors (try to avoid unneeded work)*/ 151 /* R nodes */ 152 old_size = -1; 153 if (pcbddc->vec1_R) { 154 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 155 } 156 if (n_R != old_size) { 157 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 158 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 159 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 160 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 161 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 162 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 163 } 164 /* local primal dofs */ 165 old_size = -1; 166 if (pcbddc->vec1_P) { 167 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 168 } 169 if (pcbddc->local_primal_size != old_size) { 170 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 171 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 172 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 173 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 174 } 175 /* local explicit constraints */ 176 old_size = -1; 177 if (pcbddc->vec1_C) { 178 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 179 } 180 if (n_constraints && n_constraints != old_size) { 181 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 182 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 183 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 184 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "PCBDDCSetUpCorrection" 191 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 192 { 193 PetscErrorCode ierr; 194 /* pointers to pcis and pcbddc */ 195 PC_IS* pcis = (PC_IS*)pc->data; 196 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 197 /* submatrices of local problem */ 198 Mat A_RV,A_VR,A_VV; 199 /* working matrices */ 200 Mat M1,M2,M3,C_CR; 201 /* working vectors */ 202 Vec vec1_C,vec2_C,vec1_V,vec2_V; 203 /* additional working stuff */ 204 IS is_aux; 205 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 206 const PetscScalar *array,*row_cmat_values; 207 const PetscInt *row_cmat_indices,*idx_R_local; 208 PetscInt *idx_V_B,*auxindices; 209 PetscInt n_vertices,n_constraints,size_of_constraint; 210 PetscInt i,j,n_R,n_D,n_B; 211 PetscBool unsymmetric_check; 212 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 213 MatType impMatType; 214 /* some shortcuts to scalars */ 215 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 216 /* for debugging purposes */ 217 PetscReal *coarsefunctions_errors,*constraints_errors; 218 219 PetscFunctionBegin; 220 /* get number of vertices (corners plus constraints with change of basis) 221 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 222 n_vertices = pcbddc->n_actual_vertices; 223 n_constraints = pcbddc->local_primal_size-n_vertices; 224 /* Set Non-overlapping dimensions */ 225 n_B = pcis->n_B; n_D = pcis->n - n_B; 226 n_R = pcis->n-n_vertices; 227 228 /* Set types for local objects needed by BDDC precondtioner */ 229 impMatType = MATSEQDENSE; 230 231 /* Allocating some extra storage just to be safe */ 232 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 233 for (i=0;i<pcis->n;i++) auxindices[i]=i; 234 235 /* vertices in boundary numbering */ 236 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 237 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 238 if (i != n_vertices) { 239 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 240 } 241 242 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 243 if (n_constraints) { 244 /* see if we can save some allocations */ 245 if (pcbddc->local_auxmat2) { 246 PetscInt on_R,on_constraints; 247 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 248 if (on_R != n_R || on_constraints != n_constraints) { 249 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 250 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 251 } 252 } 253 /* work vectors */ 254 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 255 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 256 /* auxiliary matrices */ 257 if (!pcbddc->local_auxmat2) { 258 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 259 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 260 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 261 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 262 } 263 264 /* Extract constraints on R nodes: C_{CR} */ 265 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 266 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 267 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 268 269 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 270 for (i=0;i<n_constraints;i++) { 271 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 272 /* Get row of constraint matrix in R numbering */ 273 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 274 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 275 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 276 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 277 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 278 /* Solve for row of constraint matrix in R numbering */ 279 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 280 /* Set values in local_auxmat2 */ 281 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 282 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 284 } 285 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 286 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 287 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 288 289 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 290 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 291 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 292 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 293 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 294 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 295 ierr = MatSetUp(M1);CHKERRQ(ierr); 296 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 297 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 298 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 299 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 300 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 301 ierr = MatDestroy(&M2);CHKERRQ(ierr); 302 ierr = MatDestroy(&M3);CHKERRQ(ierr); 303 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 304 if (!pcbddc->local_auxmat1) { 305 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 306 } else { 307 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 308 } 309 } 310 311 /* Get submatrices from subdomain matrix */ 312 if (n_vertices) { 313 PetscInt ibs,mbs; 314 PetscBool issbaij; 315 Mat newmat; 316 317 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 318 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 319 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 320 if (ibs != mbs) { /* need to convert to SEQAIJ */ 321 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 322 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 325 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 326 } else { 327 /* this is safe */ 328 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 329 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 330 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 331 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 332 /* which of the two approaches is faster? */ 333 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 334 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 335 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 336 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 337 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 338 } else { 339 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 340 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 341 } 342 } 343 ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 344 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 345 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 346 } 347 348 /* Matrix of coarse basis functions (local) */ 349 if (pcbddc->coarse_phi_B) { 350 PetscInt on_B,on_primal; 351 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 352 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 353 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 355 } 356 } 357 if (pcbddc->coarse_phi_D) { 358 PetscInt on_D,on_primal; 359 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 360 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 361 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 362 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 363 } 364 } 365 if (!pcbddc->coarse_phi_B) { 366 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 367 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 368 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 369 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 370 } 371 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 372 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 373 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 374 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 375 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 376 } 377 378 if (pcbddc->dbg_flag) { 379 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 380 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 381 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 382 } 383 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 384 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 385 386 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 387 388 /* vertices */ 389 for (i=0;i<n_vertices;i++) { 390 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 391 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 392 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 393 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 394 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 395 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 396 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 397 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 398 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 399 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 400 if (n_constraints) { 401 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 402 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 403 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 404 } 405 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 406 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 407 408 /* Set values in coarse basis function and subdomain part of coarse_mat */ 409 /* coarse basis functions */ 410 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 411 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 412 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 413 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 416 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 417 if (pcbddc->switch_static || pcbddc->dbg_flag) { 418 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 419 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 420 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 421 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 422 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 423 } 424 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 425 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 428 if (n_constraints) { 429 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 432 } 433 434 /* check */ 435 if (pcbddc->dbg_flag) { 436 /* assemble subdomain vector on local nodes */ 437 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 438 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 439 if (n_R) { 440 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 441 } 442 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 443 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 444 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 445 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 446 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 447 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 448 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 449 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 451 if (n_constraints) { 452 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 453 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 454 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 455 } 456 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 457 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 458 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 459 /* check saddle point solution */ 460 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 461 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 462 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 463 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 464 /* shift by the identity matrix */ 465 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 466 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 467 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 468 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 469 } 470 } 471 472 /* constraints */ 473 for (i=0;i<n_constraints;i++) { 474 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 475 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 476 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 477 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 478 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 479 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 480 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 482 if (n_vertices) { 483 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 484 } 485 /* Set values in coarse basis function and subdomain part of coarse_mat */ 486 /* coarse basis functions */ 487 j = i+n_vertices; /* don't touch this! */ 488 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 489 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 490 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 491 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 492 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 493 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 494 if (pcbddc->switch_static || pcbddc->dbg_flag) { 495 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 496 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 497 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 498 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 500 } 501 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 511 if (pcbddc->dbg_flag) { 512 /* assemble subdomain vector on nodes */ 513 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 514 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 515 if (n_R) { 516 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 517 } 518 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 519 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 520 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 521 /* assemble subdomain vector of lagrange multipliers */ 522 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 523 if (n_vertices) { 524 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 525 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 526 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 527 } 528 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 529 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 530 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 531 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 532 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 533 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 534 /* check saddle point solution */ 535 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 536 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 537 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 538 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 539 /* shift by the identity matrix */ 540 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 541 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 542 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 543 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 544 } 545 } 546 /* call assembling routines for local coarse basis */ 547 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 if (pcbddc->switch_static || pcbddc->dbg_flag) { 550 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 } 553 554 /* compute other basis functions for non-symmetric problems */ 555 if (!pcbddc->issym) { 556 if (!pcbddc->coarse_psi_B) { 557 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 558 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 560 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 561 } 562 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 563 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 564 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 566 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 567 } 568 for (i=0;i<pcbddc->local_primal_size;i++) { 569 if (n_constraints) { 570 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 571 for (j=0;j<n_constraints;j++) { 572 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 573 } 574 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 575 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 576 } 577 if (i<n_vertices) { 578 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 579 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 580 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 581 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 582 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 583 if (n_constraints) { 584 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 585 } 586 } else { 587 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 588 } 589 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 590 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 591 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 592 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 593 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 594 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 595 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 596 if (i<n_vertices) { 597 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 598 } 599 if (pcbddc->switch_static || pcbddc->dbg_flag) { 600 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 601 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 602 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 603 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 604 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 605 } 606 607 if (pcbddc->dbg_flag) { 608 /* assemble subdomain vector on nodes */ 609 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 610 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 611 if (n_R) { 612 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 613 } 614 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 615 if (i<n_vertices) { 616 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 617 } 618 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 619 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 620 /* assemble subdomain vector of lagrange multipliers */ 621 for (j=0;j<pcbddc->local_primal_size;j++) { 622 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 623 } 624 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 625 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 626 /* check saddle point solution */ 627 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 628 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 629 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 630 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 631 /* shift by the identity matrix */ 632 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 633 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 634 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 635 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 636 } 637 } 638 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 639 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 640 if (pcbddc->switch_static || pcbddc->dbg_flag) { 641 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 642 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 643 } 644 unsymmetric_check = PETSC_TRUE; 645 } else { /* take references to already computed coarse basis */ 646 unsymmetric_check = PETSC_FALSE; 647 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 648 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 649 if (pcbddc->coarse_phi_D) { 650 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 651 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 652 } 653 } 654 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 655 /* Checking coarse_sub_mat and coarse basis functios */ 656 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 657 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 658 if (pcbddc->dbg_flag) { 659 Mat coarse_sub_mat; 660 Mat AUXMAT,TM1,TM2,TM3,TM4; 661 Mat coarse_phi_D,coarse_phi_B; 662 Mat coarse_psi_D,coarse_psi_B; 663 Mat A_II,A_BB,A_IB,A_BI; 664 MatType checkmattype=MATSEQAIJ; 665 PetscReal real_value; 666 667 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 668 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 669 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 670 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 671 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 672 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 673 if (unsymmetric_check) { 674 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 675 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 676 } 677 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 678 679 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 680 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 681 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 682 if (unsymmetric_check) { 683 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 684 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 685 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 686 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 687 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 688 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 689 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 690 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 691 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 692 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 693 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 694 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 695 } else { 696 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 697 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 698 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 699 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 700 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 701 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 702 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 703 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 704 } 705 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 706 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 707 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 708 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 709 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 710 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 711 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 716 for (i=0;i<pcbddc->local_primal_size;i++) { 717 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 718 } 719 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 720 for (i=0;i<pcbddc->local_primal_size;i++) { 721 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 722 } 723 if (unsymmetric_check) { 724 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 725 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 726 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 727 } 728 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 729 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 730 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 731 } 732 } 733 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 734 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 738 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 739 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 740 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 741 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 742 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 743 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 744 if (unsymmetric_check) { 745 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 746 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 747 } 748 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 749 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 750 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 751 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 752 } 753 /* free memory */ 754 if (n_vertices) { 755 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 756 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 757 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 758 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 759 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 760 } 761 if (n_constraints) { 762 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 763 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 764 ierr = MatDestroy(&M1);CHKERRQ(ierr); 765 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 766 } 767 ierr = PetscFree(auxindices);CHKERRQ(ierr); 768 /* get back data */ 769 *coarse_submat_vals_n = coarse_submat_vals; 770 PetscFunctionReturn(0); 771 } 772 773 #undef __FUNCT__ 774 #define __FUNCT__ "MatGetSubMatrixUnsorted" 775 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 776 { 777 Mat *work_mat; 778 IS isrow_s,iscol_s; 779 PetscBool rsorted,csorted; 780 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 781 PetscErrorCode ierr; 782 783 PetscFunctionBegin; 784 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 785 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 786 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 787 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 788 789 if (!rsorted) { 790 const PetscInt *idxs; 791 PetscInt *idxs_sorted,i; 792 793 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 794 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 795 for (i=0;i<rsize;i++) { 796 idxs_perm_r[i] = i; 797 } 798 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 799 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 800 for (i=0;i<rsize;i++) { 801 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 802 } 803 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 804 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 805 } else { 806 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 807 isrow_s = isrow; 808 } 809 810 if (!csorted) { 811 if (isrow == iscol) { 812 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 813 iscol_s = isrow_s; 814 } else { 815 const PetscInt *idxs; 816 PetscInt *idxs_sorted,i; 817 818 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 819 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 820 for (i=0;i<csize;i++) { 821 idxs_perm_c[i] = i; 822 } 823 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 824 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 825 for (i=0;i<csize;i++) { 826 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 827 } 828 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 829 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 830 } 831 } else { 832 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 833 iscol_s = iscol; 834 } 835 836 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 837 838 if (!rsorted || !csorted) { 839 Mat new_mat; 840 IS is_perm_r,is_perm_c; 841 842 if (!rsorted) { 843 PetscInt *idxs_r,i; 844 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 845 for (i=0;i<rsize;i++) { 846 idxs_r[idxs_perm_r[i]] = i; 847 } 848 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 849 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 850 } else { 851 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 852 } 853 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 854 855 if (!csorted) { 856 if (isrow_s == iscol_s) { 857 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 858 is_perm_c = is_perm_r; 859 } else { 860 PetscInt *idxs_c,i; 861 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 862 for (i=0;i<csize;i++) { 863 idxs_c[idxs_perm_c[i]] = i; 864 } 865 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 866 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 867 } 868 } else { 869 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 870 } 871 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 872 873 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 874 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 875 work_mat[0] = new_mat; 876 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 877 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 878 } 879 880 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 881 *B = work_mat[0]; 882 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 883 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 884 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 885 PetscFunctionReturn(0); 886 } 887 888 #undef __FUNCT__ 889 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 890 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 891 { 892 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 893 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 894 Mat new_mat; 895 IS is_local,is_global; 896 PetscInt local_size; 897 PetscBool isseqaij; 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 902 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 903 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 904 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 905 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 906 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 907 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 908 909 /* check */ 910 if (pcbddc->dbg_flag) { 911 Vec x,x_change; 912 PetscReal error; 913 914 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 915 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 916 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 917 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 919 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 920 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 921 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 922 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 923 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 924 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 925 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 926 ierr = VecDestroy(&x);CHKERRQ(ierr); 927 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 928 } 929 930 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 931 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 932 if (isseqaij) { 933 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 934 } else { 935 Mat work_mat; 936 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 937 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 938 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 939 } 940 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 941 /* 942 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 943 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 944 */ 945 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 951 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 952 { 953 PC_IS* pcis = (PC_IS*)(pc->data); 954 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 955 IS is_aux1,is_aux2; 956 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 957 PetscInt n_vertices,i,j,n_R,n_D,n_B; 958 PetscInt vbs,bs; 959 PetscBT bitmask; 960 PetscErrorCode ierr; 961 962 PetscFunctionBegin; 963 /* 964 No need to setup local scatters if 965 - primal space is unchanged 966 AND 967 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 968 AND 969 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 970 */ 971 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 972 PetscFunctionReturn(0); 973 } 974 /* destroy old objects */ 975 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 976 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 977 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 978 /* Set Non-overlapping dimensions */ 979 n_B = pcis->n_B; n_D = pcis->n - n_B; 980 n_vertices = pcbddc->n_actual_vertices; 981 /* create auxiliary bitmask */ 982 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 983 for (i=0;i<n_vertices;i++) { 984 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 985 } 986 987 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 988 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 989 for (i=0, n_R=0; i<pcis->n; i++) { 990 if (!PetscBTLookup(bitmask,i)) { 991 idx_R_local[n_R] = i; 992 n_R++; 993 } 994 } 995 996 /* Block code */ 997 vbs = 1; 998 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 999 if (bs>1 && !(n_vertices%bs)) { 1000 PetscBool is_blocked = PETSC_TRUE; 1001 PetscInt *vary; 1002 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1003 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1004 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1005 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1006 for (i=0; i<n_vertices; i++) { 1007 if (vary[i]!=0 && vary[i]!=bs) { 1008 is_blocked = PETSC_FALSE; 1009 break; 1010 } 1011 } 1012 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1013 vbs = bs; 1014 for (i=0;i<n_R/vbs;i++) { 1015 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1016 } 1017 } 1018 ierr = PetscFree(vary);CHKERRQ(ierr); 1019 } 1020 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1021 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1022 1023 /* print some info if requested */ 1024 if (pcbddc->dbg_flag) { 1025 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1026 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1027 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1030 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1031 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1032 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1033 } 1034 1035 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1036 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1038 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1039 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1040 for (i=0; i<n_D; i++) { 1041 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1042 } 1043 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1044 for (i=0, j=0; i<n_R; i++) { 1045 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1046 aux_array1[j++] = i; 1047 } 1048 } 1049 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1050 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1051 for (i=0, j=0; i<n_B; i++) { 1052 if (!PetscBTLookup(bitmask,is_indices[i])) { 1053 aux_array2[j++] = i; 1054 } 1055 } 1056 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1057 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1058 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1059 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1060 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1061 1062 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1063 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1064 for (i=0, j=0; i<n_R; i++) { 1065 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1066 aux_array1[j++] = i; 1067 } 1068 } 1069 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1070 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1071 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1072 } 1073 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1074 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1081 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1082 { 1083 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1084 PC_IS *pcis = (PC_IS*)pc->data; 1085 PC pc_temp; 1086 Mat A_RR; 1087 MatReuse reuse; 1088 PetscScalar m_one = -1.0; 1089 PetscReal value; 1090 PetscInt n_D,n_R,ibs,mbs; 1091 PetscBool use_exact,use_exact_reduced,issbaij; 1092 PetscErrorCode ierr; 1093 /* prefixes stuff */ 1094 char dir_prefix[256],neu_prefix[256],str_level[16]; 1095 size_t len; 1096 1097 PetscFunctionBegin; 1098 1099 /* compute prefixes */ 1100 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1101 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1102 if (!pcbddc->current_level) { 1103 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1104 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1105 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1106 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1107 } else { 1108 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1109 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1110 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1111 len -= 15; /* remove "pc_bddc_coarse_" */ 1112 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1113 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1114 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1115 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1116 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1117 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1118 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1119 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1120 } 1121 1122 /* DIRICHLET PROBLEM */ 1123 if (dirichlet) { 1124 /* Matrix for Dirichlet problem is pcis->A_II */ 1125 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1126 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1127 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1128 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1129 /* default */ 1130 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1131 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1132 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1133 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1134 if (issbaij) { 1135 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1136 } else { 1137 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1138 } 1139 /* Allow user's customization */ 1140 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1141 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1142 } 1143 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1144 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1145 if (!n_D) { 1146 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1147 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1148 } 1149 /* Set Up KSP for Dirichlet problem of BDDC */ 1150 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1151 /* set ksp_D into pcis data */ 1152 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1153 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1154 pcis->ksp_D = pcbddc->ksp_D; 1155 } 1156 1157 /* NEUMANN PROBLEM */ 1158 A_RR = 0; 1159 if (neumann) { 1160 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1161 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1162 if (pcbddc->ksp_R) { /* already created ksp */ 1163 PetscInt nn_R; 1164 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1165 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1166 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1167 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1168 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1169 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1170 reuse = MAT_INITIAL_MATRIX; 1171 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1172 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1173 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1174 reuse = MAT_INITIAL_MATRIX; 1175 } else { /* safe to reuse the matrix */ 1176 reuse = MAT_REUSE_MATRIX; 1177 } 1178 } 1179 /* last check */ 1180 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1181 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1182 reuse = MAT_INITIAL_MATRIX; 1183 } 1184 } else { /* first time, so we need to create the matrix */ 1185 reuse = MAT_INITIAL_MATRIX; 1186 } 1187 /* extract A_RR */ 1188 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1189 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1190 if (ibs != mbs) { 1191 Mat newmat; 1192 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1193 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1194 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1195 } else { 1196 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1197 } 1198 if (!pcbddc->ksp_R) { /* create object if not present */ 1199 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1200 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1201 /* default */ 1202 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1203 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1204 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1205 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1206 if (issbaij) { 1207 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1208 } else { 1209 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1210 } 1211 /* Allow user's customization */ 1212 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1213 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1214 } 1215 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1216 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1217 if (!n_R) { 1218 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1219 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1220 } 1221 /* Set Up KSP for Neumann problem of BDDC */ 1222 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1223 } 1224 1225 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1226 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1227 if (pcbddc->dbg_flag) { 1228 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1229 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1230 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1231 } 1232 if (dirichlet) { /* Dirichlet */ 1233 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1234 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1235 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1236 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1237 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1238 /* need to be adapted? */ 1239 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1240 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1241 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1242 /* print info */ 1243 if (pcbddc->dbg_flag) { 1244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1245 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1246 } 1247 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1248 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1249 } 1250 } 1251 if (neumann) { /* Neumann */ 1252 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1253 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1254 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1255 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1256 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1257 /* need to be adapted? */ 1258 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1259 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1260 /* print info */ 1261 if (pcbddc->dbg_flag) { 1262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1263 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1264 } 1265 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1266 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1267 } 1268 } 1269 } 1270 /* free Neumann problem's matrix */ 1271 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1272 PetscFunctionReturn(0); 1273 } 1274 1275 #undef __FUNCT__ 1276 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1277 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1278 { 1279 PetscErrorCode ierr; 1280 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1281 1282 PetscFunctionBegin; 1283 if (applytranspose) { 1284 if (pcbddc->local_auxmat1) { 1285 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1286 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1287 } 1288 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1289 } else { 1290 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1291 if (pcbddc->local_auxmat1) { 1292 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1293 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1294 } 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1302 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1303 { 1304 PetscErrorCode ierr; 1305 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1306 PC_IS* pcis = (PC_IS*) (pc->data); 1307 const PetscScalar zero = 0.0; 1308 1309 PetscFunctionBegin; 1310 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1311 if (applytranspose) { 1312 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1313 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1314 } else { 1315 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1316 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1317 } 1318 /* start communications from local primal nodes to rhs of coarse solver */ 1319 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1320 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1321 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1322 1323 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1324 /* TODO remove null space when doing multilevel */ 1325 if (pcbddc->coarse_ksp) { 1326 if (applytranspose) { 1327 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1328 } else { 1329 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1330 } 1331 } 1332 1333 /* Local solution on R nodes */ 1334 if (pcis->n) { 1335 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1336 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1337 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1338 if (pcbddc->switch_static) { 1339 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1340 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1341 } 1342 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1343 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1344 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1345 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 if (pcbddc->switch_static) { 1347 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1348 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1349 } 1350 } 1351 1352 /* communications from coarse sol to local primal nodes */ 1353 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1355 1356 /* Sum contributions from two levels */ 1357 if (applytranspose) { 1358 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1359 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1360 } else { 1361 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1362 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1363 } 1364 PetscFunctionReturn(0); 1365 } 1366 1367 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1370 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1371 { 1372 PetscErrorCode ierr; 1373 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1374 PetscScalar *array,*array2; 1375 Vec from,to; 1376 1377 PetscFunctionBegin; 1378 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1379 from = pcbddc->coarse_vec; 1380 to = pcbddc->vec1_P; 1381 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1382 Vec tvec; 1383 PetscInt lsize; 1384 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1385 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1386 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1387 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1388 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1389 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1390 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1391 } 1392 } else { /* from local to global -> put data in coarse right hand side */ 1393 from = pcbddc->vec1_P; 1394 to = pcbddc->coarse_vec; 1395 } 1396 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1402 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1403 { 1404 PetscErrorCode ierr; 1405 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1406 PetscScalar *array,*array2; 1407 Vec from,to; 1408 1409 PetscFunctionBegin; 1410 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1411 from = pcbddc->coarse_vec; 1412 to = pcbddc->vec1_P; 1413 } else { /* from local to global -> put data in coarse right hand side */ 1414 from = pcbddc->vec1_P; 1415 to = pcbddc->coarse_vec; 1416 } 1417 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1418 if (smode == SCATTER_FORWARD) { 1419 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1420 Vec tvec; 1421 PetscInt lsize; 1422 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1423 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1424 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1425 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1426 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1427 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1428 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1429 } 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 /* uncomment for testing purposes */ 1435 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1438 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1439 { 1440 PetscErrorCode ierr; 1441 PC_IS* pcis = (PC_IS*)(pc->data); 1442 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1443 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1444 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1445 MatType impMatType=MATSEQAIJ; 1446 /* one and zero */ 1447 PetscScalar one=1.0,zero=0.0; 1448 /* space to store constraints and their local indices */ 1449 PetscScalar *temp_quadrature_constraint; 1450 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1451 /* iterators */ 1452 PetscInt i,j,k,total_counts,temp_start_ptr; 1453 /* stuff to store connected components stored in pcbddc->mat_graph */ 1454 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1455 PetscInt n_ISForFaces,n_ISForEdges; 1456 /* near null space stuff */ 1457 MatNullSpace nearnullsp; 1458 const Vec *nearnullvecs; 1459 Vec *localnearnullsp; 1460 PetscBool nnsp_has_cnst; 1461 PetscInt nnsp_size; 1462 PetscScalar *array; 1463 /* BLAS integers */ 1464 PetscBLASInt lwork,lierr; 1465 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1466 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1467 /* LAPACK working arrays for SVD or POD */ 1468 PetscBool skip_lapack; 1469 PetscScalar *work; 1470 PetscReal *singular_vals; 1471 #if defined(PETSC_USE_COMPLEX) 1472 PetscReal *rwork; 1473 #endif 1474 #if defined(PETSC_MISSING_LAPACK_GESVD) 1475 PetscBLASInt Blas_one_2=1; 1476 PetscScalar *temp_basis,*correlation_mat; 1477 #else 1478 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1479 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1480 #endif 1481 /* reuse */ 1482 PetscInt olocal_primal_size; 1483 PetscInt *oprimal_indices_local_idxs; 1484 /* change of basis */ 1485 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1486 PetscBool boolforchange,qr_needed; 1487 PetscBT touched,change_basis,qr_needed_idx; 1488 /* auxiliary stuff */ 1489 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1490 PetscInt ncc,*gidxs,*permutation,*temp_indices_to_constraint_work; 1491 PetscScalar *temp_quadrature_constraint_work; 1492 /* some quantities */ 1493 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1494 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1495 1496 1497 PetscFunctionBegin; 1498 /* Destroy Mat objects computed previously */ 1499 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1500 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1501 /* Get index sets for faces, edges and vertices from graph */ 1502 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1503 /* free unneeded index sets */ 1504 if (!pcbddc->use_vertices) { 1505 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1506 } 1507 if (!pcbddc->use_edges) { 1508 for (i=0;i<n_ISForEdges;i++) { 1509 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1510 } 1511 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1512 n_ISForEdges = 0; 1513 } 1514 if (!pcbddc->use_faces) { 1515 for (i=0;i<n_ISForFaces;i++) { 1516 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1517 } 1518 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1519 n_ISForFaces = 0; 1520 } 1521 /* HACKS (the following two blocks of code) */ 1522 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1523 pcbddc->use_change_of_basis = PETSC_TRUE; 1524 if (!ISForEdges) { 1525 pcbddc->use_change_on_faces = PETSC_TRUE; 1526 } 1527 } 1528 if (pcbddc->NullSpace) { 1529 /* use_change_of_basis should be consistent among processors */ 1530 PetscBool tbool[2],gbool[2]; 1531 tbool [0] = pcbddc->use_change_of_basis; 1532 tbool [1] = pcbddc->use_change_on_faces; 1533 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1534 pcbddc->use_change_of_basis = gbool[0]; 1535 pcbddc->use_change_on_faces = gbool[1]; 1536 } 1537 /* print some info */ 1538 if (pcbddc->dbg_flag) { 1539 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1541 i = 0; 1542 if (ISForVertices) { 1543 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1544 } 1545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1549 } 1550 /* check if near null space is attached to global mat */ 1551 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1552 if (nearnullsp) { 1553 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1554 /* remove any stored info */ 1555 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1556 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1557 /* store information for BDDC solver reuse */ 1558 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1559 pcbddc->onearnullspace = nearnullsp; 1560 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1561 for (i=0;i<nnsp_size;i++) { 1562 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1563 } 1564 } else { /* if near null space is not provided BDDC uses constants by default */ 1565 nnsp_size = 0; 1566 nnsp_has_cnst = PETSC_TRUE; 1567 } 1568 /* get max number of constraints on a single cc */ 1569 max_constraints = nnsp_size; 1570 if (nnsp_has_cnst) max_constraints++; 1571 1572 /* 1573 Evaluate maximum storage size needed by the procedure 1574 - temp_indices will contain start index of each constraint stored as follows 1575 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1576 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1577 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1578 */ 1579 total_counts = n_ISForFaces+n_ISForEdges; 1580 total_counts *= max_constraints; 1581 n_vertices = 0; 1582 if (ISForVertices) { 1583 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1584 } 1585 total_counts += n_vertices; 1586 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1587 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1588 total_counts = 0; 1589 max_size_of_constraint = 0; 1590 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1591 if (i<n_ISForEdges) { 1592 used_IS = &ISForEdges[i]; 1593 } else { 1594 used_IS = &ISForFaces[i-n_ISForEdges]; 1595 } 1596 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1597 total_counts += j; 1598 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1599 } 1600 total_counts *= max_constraints; 1601 total_counts += n_vertices; 1602 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1603 /* get local part of global near null space vectors */ 1604 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1605 for (k=0;k<nnsp_size;k++) { 1606 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1607 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 } 1610 1611 /* whether or not to skip lapack calls */ 1612 skip_lapack = PETSC_TRUE; 1613 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1614 1615 /* allocate some auxiliary stuff */ 1616 if (!skip_lapack || pcbddc->use_qr_single) { 1617 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1618 } 1619 1620 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1621 if (!skip_lapack) { 1622 PetscScalar temp_work; 1623 1624 #if defined(PETSC_MISSING_LAPACK_GESVD) 1625 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1626 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1627 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1628 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1629 #if defined(PETSC_USE_COMPLEX) 1630 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1631 #endif 1632 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1633 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1634 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1635 lwork = -1; 1636 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1637 #if !defined(PETSC_USE_COMPLEX) 1638 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1639 #else 1640 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1641 #endif 1642 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1643 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1644 #else /* on missing GESVD */ 1645 /* SVD */ 1646 PetscInt max_n,min_n; 1647 max_n = max_size_of_constraint; 1648 min_n = max_constraints; 1649 if (max_size_of_constraint < max_constraints) { 1650 min_n = max_size_of_constraint; 1651 max_n = max_constraints; 1652 } 1653 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1654 #if defined(PETSC_USE_COMPLEX) 1655 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1656 #endif 1657 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1658 lwork = -1; 1659 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1660 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1661 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1662 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1663 #if !defined(PETSC_USE_COMPLEX) 1664 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1665 #else 1666 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1667 #endif 1668 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1669 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1670 #endif /* on missing GESVD */ 1671 /* Allocate optimal workspace */ 1672 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1673 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1674 } 1675 /* Now we can loop on constraining sets */ 1676 total_counts = 0; 1677 temp_indices[0] = 0; 1678 /* vertices */ 1679 if (ISForVertices) { 1680 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1681 if (nnsp_has_cnst) { /* consider all vertices */ 1682 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1683 for (i=0;i<n_vertices;i++) { 1684 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1685 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1686 total_counts++; 1687 } 1688 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1689 PetscBool used_vertex; 1690 for (i=0;i<n_vertices;i++) { 1691 used_vertex = PETSC_FALSE; 1692 k = 0; 1693 while (!used_vertex && k<nnsp_size) { 1694 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1695 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1696 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1697 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1698 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1699 total_counts++; 1700 used_vertex = PETSC_TRUE; 1701 } 1702 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1703 k++; 1704 } 1705 } 1706 } 1707 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1708 n_vertices = total_counts; 1709 } 1710 1711 /* edges and faces */ 1712 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1713 if (ncc<n_ISForEdges) { 1714 used_IS = &ISForEdges[ncc]; 1715 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1716 } else { 1717 used_IS = &ISForFaces[ncc-n_ISForEdges]; 1718 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1719 } 1720 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1721 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1722 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1723 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1724 /* change of basis should not be performed on local periodic nodes */ 1725 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1726 if (nnsp_has_cnst) { 1727 PetscScalar quad_value; 1728 temp_constraints++; 1729 if (!pcbddc->use_nnsp_true) { 1730 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1731 } else { 1732 quad_value = 1.0; 1733 } 1734 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1735 for (j=0;j<size_of_constraint;j++) { 1736 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1737 } 1738 /* sort by global ordering if using lapack subroutines */ 1739 if (!skip_lapack || pcbddc->use_qr_single) { 1740 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1741 for (j=0;j<size_of_constraint;j++) { 1742 permutation[j]=j; 1743 } 1744 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1745 for (j=0;j<size_of_constraint;j++) { 1746 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1747 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1748 } 1749 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1750 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1751 } 1752 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1753 total_counts++; 1754 } 1755 for (k=0;k<nnsp_size;k++) { 1756 PetscReal real_value; 1757 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1758 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1759 for (j=0;j<size_of_constraint;j++) { 1760 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1761 } 1762 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1763 /* check if array is null on the connected component */ 1764 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1765 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1766 if (real_value > 0.0) { /* keep indices and values */ 1767 /* sort by global ordering if using lapack subroutines */ 1768 if (!skip_lapack || pcbddc->use_qr_single) { 1769 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 1770 for (j=0;j<size_of_constraint;j++) { 1771 permutation[j]=j; 1772 } 1773 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 1774 for (j=0;j<size_of_constraint;j++) { 1775 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 1776 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 1777 } 1778 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1779 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 1780 } 1781 temp_constraints++; 1782 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1783 total_counts++; 1784 } 1785 } 1786 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1787 valid_constraints = temp_constraints; 1788 if (!pcbddc->use_nnsp_true && temp_constraints) { 1789 if (temp_constraints == 1) { /* just normalize the constraint */ 1790 PetscScalar norm; 1791 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1792 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1793 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 1794 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 1795 } else { /* perform SVD */ 1796 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1797 1798 #if defined(PETSC_MISSING_LAPACK_GESVD) 1799 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1800 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1801 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1802 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1803 from that computed using LAPACKgesvd 1804 -> This is due to a different computation of eigenvectors in LAPACKheev 1805 -> The quality of the POD-computed basis will be the same */ 1806 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1807 /* Store upper triangular part of correlation matrix */ 1808 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1809 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1810 for (j=0;j<temp_constraints;j++) { 1811 for (k=0;k<j+1;k++) { 1812 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1813 } 1814 } 1815 /* compute eigenvalues and eigenvectors of correlation matrix */ 1816 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1817 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1818 #if !defined(PETSC_USE_COMPLEX) 1819 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1820 #else 1821 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1822 #endif 1823 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1824 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1825 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1826 j = 0; 1827 while (j < temp_constraints && singular_vals[j] < tol) j++; 1828 total_counts = total_counts-j; 1829 valid_constraints = temp_constraints-j; 1830 /* scale and copy POD basis into used quadrature memory */ 1831 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1832 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1833 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1834 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1835 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1836 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1837 if (j<temp_constraints) { 1838 PetscInt ii; 1839 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1840 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1841 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1842 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1843 for (k=0;k<temp_constraints-j;k++) { 1844 for (ii=0;ii<size_of_constraint;ii++) { 1845 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1846 } 1847 } 1848 } 1849 #else /* on missing GESVD */ 1850 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1851 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1853 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1854 #if !defined(PETSC_USE_COMPLEX) 1855 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1856 #else 1857 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1858 #endif 1859 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1860 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1861 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1862 k = temp_constraints; 1863 if (k > size_of_constraint) k = size_of_constraint; 1864 j = 0; 1865 while (j < k && singular_vals[k-j-1] < tol) j++; 1866 valid_constraints = k-j; 1867 total_counts = total_counts-temp_constraints+valid_constraints; 1868 #endif /* on missing GESVD */ 1869 } 1870 } 1871 /* setting change_of_basis flag is safe now */ 1872 if (boolforchange) { 1873 for (j=0;j<valid_constraints;j++) { 1874 PetscBTSet(change_basis,total_counts-j-1); 1875 } 1876 } 1877 } 1878 /* free index sets of faces, edges and vertices */ 1879 for (i=0;i<n_ISForFaces;i++) { 1880 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1881 } 1882 if (n_ISForFaces) { 1883 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1884 } 1885 for (i=0;i<n_ISForEdges;i++) { 1886 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1887 } 1888 if (n_ISForEdges) { 1889 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1890 } 1891 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1892 /* map temp_indices_to_constraint in boundary numbering */ 1893 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1894 if (i != temp_indices[total_counts]) { 1895 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1896 } 1897 1898 /* free workspace */ 1899 if (!skip_lapack || pcbddc->use_qr_single) { 1900 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 1901 } 1902 if (!skip_lapack) { 1903 ierr = PetscFree(work);CHKERRQ(ierr); 1904 #if defined(PETSC_USE_COMPLEX) 1905 ierr = PetscFree(rwork);CHKERRQ(ierr); 1906 #endif 1907 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1908 #if defined(PETSC_MISSING_LAPACK_GESVD) 1909 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1910 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1911 #endif 1912 } 1913 for (k=0;k<nnsp_size;k++) { 1914 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1915 } 1916 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1917 1918 /* set quantities in pcbddc data structure and store previous primal size */ 1919 /* n_vertices defines the number of subdomain corners in the primal space */ 1920 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1921 olocal_primal_size = pcbddc->local_primal_size; 1922 pcbddc->local_primal_size = total_counts; 1923 pcbddc->n_vertices = n_vertices; 1924 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1925 1926 /* Create constraint matrix */ 1927 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1928 /* so we need to set it up properly either with or without change of basis */ 1929 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1930 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1931 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1932 /* array to compute a local numbering of constraints : vertices first then constraints */ 1933 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1934 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1935 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1936 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1937 /* auxiliary stuff for basis change */ 1938 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1939 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1940 1941 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1942 total_primal_vertices=0; 1943 for (i=0;i<pcbddc->local_primal_size;i++) { 1944 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1945 if (size_of_constraint == 1) { 1946 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1947 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1948 aux_primal_minloc[total_primal_vertices]=0; 1949 total_primal_vertices++; 1950 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1951 PetscInt min_loc,min_index; 1952 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1953 /* find first untouched local node */ 1954 k = 0; 1955 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1956 min_index = global_indices[k]; 1957 min_loc = k; 1958 /* search the minimum among global nodes already untouched on the cc */ 1959 for (k=1;k<size_of_constraint;k++) { 1960 /* there can be more than one constraint on a single connected component */ 1961 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1962 min_index = global_indices[k]; 1963 min_loc = k; 1964 } 1965 } 1966 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1967 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1968 aux_primal_minloc[total_primal_vertices]=min_loc; 1969 total_primal_vertices++; 1970 } 1971 } 1972 /* determine if a QR strategy is needed for change of basis */ 1973 qr_needed = PETSC_FALSE; 1974 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1975 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1976 if (PetscBTLookup(change_basis,i)) { 1977 if (!pcbddc->use_qr_single) { 1978 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1979 j = 0; 1980 for (k=0;k<size_of_constraint;k++) { 1981 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1982 j++; 1983 } 1984 } 1985 /* found more than one primal dof on the cc */ 1986 if (j > 1) { 1987 PetscBTSet(qr_needed_idx,i); 1988 qr_needed = PETSC_TRUE; 1989 } 1990 } else { 1991 PetscBTSet(qr_needed_idx,i); 1992 qr_needed = PETSC_TRUE; 1993 } 1994 } 1995 } 1996 /* free workspace */ 1997 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1998 1999 /* permute indices in order to have a sorted set of vertices */ 2000 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2001 2002 /* nonzero structure of constraint matrix */ 2003 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2004 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2005 j=total_primal_vertices; 2006 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2007 if (!PetscBTLookup(change_basis,i)) { 2008 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2009 j++; 2010 } 2011 } 2012 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2013 ierr = PetscFree(nnz);CHKERRQ(ierr); 2014 /* set values in constraint matrix */ 2015 for (i=0;i<total_primal_vertices;i++) { 2016 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2017 } 2018 total_counts = total_primal_vertices; 2019 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2020 if (!PetscBTLookup(change_basis,i)) { 2021 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2022 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2023 total_counts++; 2024 } 2025 } 2026 /* assembling */ 2027 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2028 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2029 /* 2030 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2031 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2032 */ 2033 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2034 if (pcbddc->use_change_of_basis) { 2035 /* dual and primal dofs on a single cc */ 2036 PetscInt dual_dofs,primal_dofs; 2037 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2038 PetscInt primal_counter; 2039 /* working stuff for GEQRF */ 2040 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2041 PetscBLASInt lqr_work; 2042 /* working stuff for UNGQR */ 2043 PetscScalar *gqr_work,lgqr_work_t; 2044 PetscBLASInt lgqr_work; 2045 /* working stuff for TRTRS */ 2046 PetscScalar *trs_rhs; 2047 PetscBLASInt Blas_NRHS; 2048 /* pointers for values insertion into change of basis matrix */ 2049 PetscInt *start_rows,*start_cols; 2050 PetscScalar *start_vals; 2051 /* working stuff for values insertion */ 2052 PetscBT is_primal; 2053 /* matrix sizes */ 2054 PetscInt global_size,local_size; 2055 /* work array for nonzeros */ 2056 PetscScalar *nnz_array; 2057 /* temporary change of basis */ 2058 Mat localChangeOfBasisMatrix; 2059 /* auxiliary work for global change of basis */ 2060 Vec nnz_vec; 2061 PetscInt *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz; 2062 PetscInt nvtxs,*xadj,*adjncy,*idxs_mapped; 2063 PetscScalar *vals; 2064 PetscBool done; 2065 2066 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2067 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2068 ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 2069 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 2070 2071 /* nonzeros for local mat */ 2072 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 2073 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 2074 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2075 if (PetscBTLookup(change_basis,i)) { 2076 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2077 if (PetscBTLookup(qr_needed_idx,i)) { 2078 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2079 } else { 2080 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 2081 /* get local primal index on the cc */ 2082 j = 0; 2083 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2084 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 2085 } 2086 } 2087 } 2088 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2089 /* Set initial identity in the matrix */ 2090 for (i=0;i<pcis->n_B;i++) { 2091 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2092 } 2093 2094 if (pcbddc->dbg_flag) { 2095 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2096 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2097 } 2098 2099 2100 /* Now we loop on the constraints which need a change of basis */ 2101 /* 2102 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2103 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2104 2105 Basic blocks of change of basis matrix T computed by 2106 2107 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2108 2109 | 1 0 ... 0 s_1/S | 2110 | 0 1 ... 0 s_2/S | 2111 | ... | 2112 | 0 ... 1 s_{n-1}/S | 2113 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2114 2115 with S = \sum_{i=1}^n s_i^2 2116 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2117 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2118 2119 - QR decomposition of constraints otherwise 2120 */ 2121 if (qr_needed) { 2122 /* space to store Q */ 2123 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2124 /* first we issue queries for optimal work */ 2125 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2126 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2127 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2128 lqr_work = -1; 2129 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2130 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2131 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2132 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2133 lgqr_work = -1; 2134 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2138 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2139 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2140 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2141 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2142 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2143 /* array to store scaling factors for reflectors */ 2144 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2145 /* array to store rhs and solution of triangular solver */ 2146 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2147 /* allocating workspace for check */ 2148 if (pcbddc->dbg_flag) { 2149 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 2150 } 2151 } 2152 /* array to store whether a node is primal or not */ 2153 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2154 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2155 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2156 if (i != total_primal_vertices) { 2157 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2158 } 2159 for (i=0;i<total_primal_vertices;i++) { 2160 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2161 } 2162 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2163 2164 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2165 /* -> using implicit ordering contained in temp_indices data */ 2166 total_counts = pcbddc->n_vertices; 2167 primal_counter = total_counts; 2168 while (total_counts<pcbddc->local_primal_size) { 2169 primal_dofs = 1; 2170 if (PetscBTLookup(change_basis,total_counts)) { 2171 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2172 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 2173 primal_dofs++; 2174 } 2175 /* get constraint info */ 2176 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2177 dual_dofs = size_of_constraint-primal_dofs; 2178 2179 if (pcbddc->dbg_flag) { 2180 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2181 } 2182 2183 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2184 2185 /* copy quadrature constraints for change of basis check */ 2186 if (pcbddc->dbg_flag) { 2187 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2188 } 2189 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2190 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2191 2192 /* compute QR decomposition of constraints */ 2193 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2194 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2195 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2197 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2198 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2199 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2200 2201 /* explictly compute R^-T */ 2202 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2203 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2204 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2205 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2206 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2207 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2208 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2209 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2210 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2211 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2212 2213 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2214 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2215 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2216 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2217 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2218 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2219 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2220 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2221 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2222 2223 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2224 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2225 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2226 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2227 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2228 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2229 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2230 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2231 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2232 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2233 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2234 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2235 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2236 2237 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2238 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2239 /* insert cols for primal dofs */ 2240 for (j=0;j<primal_dofs;j++) { 2241 start_vals = &qr_basis[j*size_of_constraint]; 2242 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2243 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2244 } 2245 /* insert cols for dual dofs */ 2246 for (j=0,k=0;j<dual_dofs;k++) { 2247 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2248 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2249 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2250 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2251 j++; 2252 } 2253 } 2254 2255 /* check change of basis */ 2256 if (pcbddc->dbg_flag) { 2257 PetscInt ii,jj; 2258 PetscBool valid_qr=PETSC_TRUE; 2259 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2260 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2261 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2262 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2263 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2264 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2265 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2266 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2267 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2268 for (jj=0;jj<size_of_constraint;jj++) { 2269 for (ii=0;ii<primal_dofs;ii++) { 2270 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2271 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2272 } 2273 } 2274 if (!valid_qr) { 2275 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2276 for (jj=0;jj<size_of_constraint;jj++) { 2277 for (ii=0;ii<primal_dofs;ii++) { 2278 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2280 } 2281 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2283 } 2284 } 2285 } 2286 } else { 2287 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2288 } 2289 } 2290 } else { /* simple transformation block */ 2291 PetscInt row,col; 2292 PetscScalar val,norm; 2293 2294 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2295 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2296 for (j=0;j<size_of_constraint;j++) { 2297 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2298 if (!PetscBTLookup(is_primal,row)) { 2299 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2300 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2301 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2302 } else { 2303 for (k=0;k<size_of_constraint;k++) { 2304 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2305 if (row != col) { 2306 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2307 } else { 2308 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2309 } 2310 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2311 } 2312 } 2313 } 2314 if (pcbddc->dbg_flag) { 2315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2316 } 2317 } 2318 /* increment primal counter */ 2319 primal_counter += primal_dofs; 2320 } else { 2321 if (pcbddc->dbg_flag) { 2322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2323 } 2324 } 2325 /* increment constraint counter total_counts */ 2326 total_counts += primal_dofs; 2327 } 2328 2329 /* free workspace */ 2330 if (qr_needed) { 2331 if (pcbddc->dbg_flag) { 2332 ierr = PetscFree(work);CHKERRQ(ierr); 2333 } 2334 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2335 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2336 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2337 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2338 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2339 } 2340 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2341 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 2344 /* assembling of global change of variable */ 2345 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2346 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2347 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2348 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2349 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2350 ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr); 2351 2352 /* nonzeros (overestimated) */ 2353 ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr); 2354 ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr); 2355 ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr); 2356 for (i=0;i<pcis->n;i++) { 2357 nnz_array[i] = 1.0; 2358 idxs_all[i] = i; 2359 } 2360 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2361 for (i=0;i<pcis->n_B;i++) { 2362 nnz_array[idxs_B[i]] = nnz[i]; 2363 } 2364 if (pcis->n) { 2365 ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr); 2366 } 2367 ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr); 2368 ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr); 2369 ierr = PetscFree(nnz);CHKERRQ(ierr); 2370 ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr); 2371 ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr); 2372 ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2373 for (i=0;i<local_size;i++) { 2374 d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size); 2375 o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size); 2376 } 2377 ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr); 2378 ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr); 2379 ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2380 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2381 2382 /* Set identity on dirichlet dofs */ 2383 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2384 for (i=0;i<pcis->n-pcis->n_B;i++) { 2385 PetscScalar one=1.0; 2386 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr); 2387 } 2388 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr); 2389 2390 /* Set values at interface dofs */ 2391 done = PETSC_TRUE; 2392 ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2393 if (!done) { 2394 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2395 } 2396 ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2397 ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr); 2398 ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr); 2399 for (i=0;i<nvtxs;i++) { 2400 PetscInt row,*cols,ncols; 2401 PetscScalar *mat_vals; 2402 2403 row = idxs_B[i]; 2404 ncols = xadj[i+1]-xadj[i]; 2405 cols = idxs_mapped+xadj[i]; 2406 mat_vals = vals+xadj[i]; 2407 ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr); 2408 } 2409 ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr); 2410 if (!done) { 2411 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2412 } 2413 ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr); 2414 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr); 2415 ierr = PetscFree(idxs_mapped);CHKERRQ(ierr); 2416 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2417 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2418 2419 /* check */ 2420 if (pcbddc->dbg_flag) { 2421 PetscReal error; 2422 Vec x,x_change; 2423 2424 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2425 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2426 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2427 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2428 ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2429 ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2430 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 2431 ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2432 ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2433 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2434 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2435 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2436 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2437 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr); 2438 ierr = VecDestroy(&x);CHKERRQ(ierr); 2439 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2440 } 2441 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2442 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2443 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2444 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2445 } 2446 2447 /* set up change of basis context */ 2448 if (pcbddc->ChangeOfBasisMatrix) { 2449 PCBDDCChange_ctx change_ctx; 2450 2451 if (!pcbddc->new_global_mat) { 2452 PetscInt global_size,local_size; 2453 2454 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2455 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2456 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2457 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2458 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2459 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2460 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2461 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2462 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2463 } else { 2464 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2465 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2466 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2467 } 2468 if (!pcbddc->user_ChangeOfBasisMatrix) { 2469 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2470 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2471 } else { 2472 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2473 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2474 } 2475 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2476 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2477 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2478 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2479 } 2480 2481 /* get indices in local ordering for vertices and constraints */ 2482 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2483 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2484 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2485 } 2486 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2487 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2488 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2489 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2490 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2491 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2492 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2493 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2494 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2495 /* set quantities in PCBDDC data struct */ 2496 pcbddc->n_actual_vertices = i; 2497 /* check if a new primal space has been introduced */ 2498 pcbddc->new_primal_space_local = PETSC_TRUE; 2499 if (olocal_primal_size == pcbddc->local_primal_size) { 2500 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2501 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2502 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2503 } 2504 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2505 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2506 2507 /* flush dbg viewer */ 2508 if (pcbddc->dbg_flag) { 2509 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2510 } 2511 2512 /* free workspace */ 2513 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2514 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2515 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2516 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2517 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2518 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2524 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2525 { 2526 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2527 PC_IS *pcis = (PC_IS*)pc->data; 2528 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2529 PetscInt ierr,i,vertex_size; 2530 PetscViewer viewer=pcbddc->dbg_viewer; 2531 2532 PetscFunctionBegin; 2533 /* Reset previously computed graph */ 2534 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2535 /* Init local Graph struct */ 2536 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2537 2538 /* Check validity of the csr graph passed in by the user */ 2539 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2540 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2541 } 2542 2543 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2544 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2545 Mat mat_adj; 2546 PetscInt *xadj,*adjncy; 2547 PetscInt nvtxs; 2548 PetscBool flg_row=PETSC_TRUE; 2549 2550 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2551 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2552 if (!flg_row) { 2553 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2554 } 2555 if (pcbddc->use_local_adj) { 2556 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2557 pcbddc->deluxe_compute_rowadj = PETSC_FALSE; 2558 } else { /* just compute subdomain's connected components */ 2559 IS is_dummy; 2560 ISLocalToGlobalMapping l2gmap_dummy; 2561 PetscInt j,sum; 2562 PetscInt *cxadj,*cadjncy; 2563 const PetscInt *idxs; 2564 PCBDDCGraph graph; 2565 PetscBT is_on_boundary; 2566 2567 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2568 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2569 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2570 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2571 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2572 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2573 graph->xadj = xadj; 2574 graph->adjncy = adjncy; 2575 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2576 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2577 2578 if (pcbddc->dbg_flag) { 2579 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2580 for (i=0;i<graph->ncc;i++) { 2581 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2582 } 2583 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2584 } 2585 2586 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2587 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2588 for (i=0;i<pcis->n_B;i++) { 2589 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2590 } 2591 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2592 2593 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2594 sum = 0; 2595 for (i=0;i<graph->ncc;i++) { 2596 PetscInt sizecc = 0; 2597 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2598 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2599 sizecc++; 2600 } 2601 } 2602 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2603 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2604 cxadj[graph->queue[j]] = sizecc; 2605 } 2606 } 2607 sum += sizecc*sizecc; 2608 } 2609 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2610 sum = 0; 2611 for (i=0;i<nvtxs;i++) { 2612 PetscInt temp = cxadj[i]; 2613 cxadj[i] = sum; 2614 sum += temp; 2615 } 2616 cxadj[nvtxs] = sum; 2617 for (i=0;i<graph->ncc;i++) { 2618 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2619 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2620 PetscInt k,sizecc = 0; 2621 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2622 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2623 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2624 sizecc++; 2625 } 2626 } 2627 } 2628 } 2629 } 2630 if (nvtxs) { 2631 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2632 } else { 2633 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2634 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2635 } 2636 graph->xadj = 0; 2637 graph->adjncy = 0; 2638 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2639 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2640 } 2641 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2642 if (!flg_row) { 2643 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2644 } 2645 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2646 } 2647 2648 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2649 vertex_size = 1; 2650 if (pcbddc->user_provided_isfordofs) { 2651 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2652 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2653 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2654 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2655 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2656 } 2657 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2658 pcbddc->n_ISForDofs = 0; 2659 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2660 } 2661 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2662 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2663 } else { 2664 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2665 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2666 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2667 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2668 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2669 } 2670 } 2671 } 2672 2673 /* Setup of Graph */ 2674 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2675 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2676 } 2677 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2678 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2679 } 2680 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2681 2682 /* Graph's connected components analysis */ 2683 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2684 2685 /* print some info to stdout */ 2686 if (pcbddc->dbg_flag) { 2687 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2688 } 2689 2690 /* mark topography has done */ 2691 pcbddc->recompute_topography = PETSC_FALSE; 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2697 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2698 { 2699 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2700 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2701 PetscErrorCode ierr; 2702 2703 PetscFunctionBegin; 2704 n = 0; 2705 vertices = 0; 2706 if (pcbddc->ConstraintMatrix) { 2707 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2708 for (i=0;i<local_primal_size;i++) { 2709 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2710 if (size_of_constraint == 1) n++; 2711 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2712 } 2713 if (vertices_idx) { 2714 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2715 n = 0; 2716 for (i=0;i<local_primal_size;i++) { 2717 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2718 if (size_of_constraint == 1) { 2719 vertices[n++]=row_cmat_indices[0]; 2720 } 2721 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2722 } 2723 } 2724 } 2725 *n_vertices = n; 2726 if (vertices_idx) *vertices_idx = vertices; 2727 PetscFunctionReturn(0); 2728 } 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2732 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2733 { 2734 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2735 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2736 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2737 PetscBT touched; 2738 PetscErrorCode ierr; 2739 2740 /* This function assumes that the number of local constraints per connected component 2741 is not greater than the number of nodes defined for the connected component 2742 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2743 PetscFunctionBegin; 2744 n = 0; 2745 constraints_index = 0; 2746 if (pcbddc->ConstraintMatrix) { 2747 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2748 max_size_of_constraint = 0; 2749 for (i=0;i<local_primal_size;i++) { 2750 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2751 if (size_of_constraint > 1) { 2752 n++; 2753 } 2754 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2755 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2756 } 2757 if (constraints_idx) { 2758 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2759 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2760 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2761 n = 0; 2762 for (i=0;i<local_primal_size;i++) { 2763 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2764 if (size_of_constraint > 1) { 2765 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2766 /* find first untouched local node */ 2767 j = 0; 2768 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2769 min_index = row_cmat_global_indices[j]; 2770 min_loc = j; 2771 /* search the minimum among nodes not yet touched on the connected component 2772 since there can be more than one constraint on a single cc */ 2773 for (j=1;j<size_of_constraint;j++) { 2774 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2775 min_index = row_cmat_global_indices[j]; 2776 min_loc = j; 2777 } 2778 } 2779 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2780 constraints_index[n++] = row_cmat_indices[min_loc]; 2781 } 2782 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2783 } 2784 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2785 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2786 } 2787 } 2788 *n_constraints = n; 2789 if (constraints_idx) *constraints_idx = constraints_index; 2790 PetscFunctionReturn(0); 2791 } 2792 2793 #undef __FUNCT__ 2794 #define __FUNCT__ "PCBDDCSubsetNumbering" 2795 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2796 { 2797 Vec local_vec,global_vec; 2798 IS seqis,paris; 2799 VecScatter scatter_ctx; 2800 PetscScalar *array; 2801 PetscInt *temp_global_dofs; 2802 PetscScalar globalsum; 2803 PetscInt i,j,s; 2804 PetscInt nlocals,first_index,old_index,max_local; 2805 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2806 PetscMPIInt *dof_sizes,*dof_displs; 2807 PetscBool first_found; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 /* mpi buffers */ 2812 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 2813 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 2814 j = ( !rank_prec_comm ? size_prec_comm : 0); 2815 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2816 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2817 /* get maximum size of subset */ 2818 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2819 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2820 max_local = 0; 2821 for (i=0;i<n_local_dofs;i++) { 2822 if (max_local < temp_global_dofs[i] ) { 2823 max_local = temp_global_dofs[i]; 2824 } 2825 } 2826 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 2827 max_global++; 2828 max_local = 0; 2829 for (i=0;i<n_local_dofs;i++) { 2830 if (max_local < local_dofs[i] ) { 2831 max_local = local_dofs[i]; 2832 } 2833 } 2834 max_local++; 2835 /* allocate workspace */ 2836 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2837 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2838 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2839 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2840 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2841 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2842 /* create scatter */ 2843 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2844 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2845 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2846 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2847 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2848 /* init array */ 2849 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2850 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2851 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2852 if (local_dofs_mult) { 2853 for (i=0;i<n_local_dofs;i++) { 2854 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2855 } 2856 } else { 2857 for (i=0;i<n_local_dofs;i++) { 2858 array[local_dofs[i]]=1.0; 2859 } 2860 } 2861 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2862 /* scatter into global vec and get total number of global dofs */ 2863 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2865 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2866 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2867 /* Fill global_vec with cumulative function for global numbering */ 2868 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2869 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2870 nlocals = 0; 2871 first_index = -1; 2872 first_found = PETSC_FALSE; 2873 for (i=0;i<s;i++) { 2874 if (!first_found && PetscRealPart(array[i]) > 0.1) { 2875 first_found = PETSC_TRUE; 2876 first_index = i; 2877 } 2878 nlocals += (PetscInt)PetscRealPart(array[i]); 2879 } 2880 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2881 if (!rank_prec_comm) { 2882 dof_displs[0]=0; 2883 for (i=1;i<size_prec_comm;i++) { 2884 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2885 } 2886 } 2887 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2888 if (first_found) { 2889 array[first_index] += (PetscScalar)nlocals; 2890 old_index = first_index; 2891 for (i=first_index+1;i<s;i++) { 2892 if (PetscRealPart(array[i]) > 0.1) { 2893 array[i] += array[old_index]; 2894 old_index = i; 2895 } 2896 } 2897 } 2898 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2899 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2900 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2901 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2902 /* get global ordering of local dofs */ 2903 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2904 if (local_dofs_mult) { 2905 for (i=0;i<n_local_dofs;i++) { 2906 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2907 } 2908 } else { 2909 for (i=0;i<n_local_dofs;i++) { 2910 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2911 } 2912 } 2913 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2914 /* free workspace */ 2915 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2916 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2917 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2918 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2919 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2920 /* return pointer to global ordering of local dofs */ 2921 *global_numbering_subset = temp_global_dofs; 2922 PetscFunctionReturn(0); 2923 } 2924 2925 #undef __FUNCT__ 2926 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2927 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2928 { 2929 PetscInt i,j; 2930 PetscScalar *alphas; 2931 PetscErrorCode ierr; 2932 2933 PetscFunctionBegin; 2934 /* this implements stabilized Gram-Schmidt */ 2935 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2936 for (i=0;i<n;i++) { 2937 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2938 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2939 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2940 } 2941 ierr = PetscFree(alphas);CHKERRQ(ierr); 2942 PetscFunctionReturn(0); 2943 } 2944 2945 #undef __FUNCT__ 2946 #define __FUNCT__ "MatISGetSubassemblingPattern" 2947 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2948 { 2949 Mat subdomain_adj; 2950 IS new_ranks,ranks_send_to; 2951 MatPartitioning partitioner; 2952 Mat_IS *matis; 2953 PetscInt n_neighs,*neighs,*n_shared,**shared; 2954 PetscInt prank; 2955 PetscMPIInt size,rank,color; 2956 PetscInt *xadj,*adjncy,*oldranks; 2957 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2958 PetscInt i,local_size,threshold=0; 2959 PetscErrorCode ierr; 2960 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2961 PetscSubcomm subcomm; 2962 2963 PetscFunctionBegin; 2964 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2965 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2966 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2967 2968 /* Get info on mapping */ 2969 matis = (Mat_IS*)(mat->data); 2970 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2971 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2972 2973 /* build local CSR graph of subdomains' connectivity */ 2974 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2975 xadj[0] = 0; 2976 xadj[1] = PetscMax(n_neighs-1,0); 2977 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2978 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2979 2980 if (threshold) { 2981 PetscInt xadj_count = 0; 2982 for (i=1;i<n_neighs;i++) { 2983 if (n_shared[i] > threshold) { 2984 adjncy[xadj_count] = neighs[i]; 2985 adjncy_wgt[xadj_count] = n_shared[i]; 2986 xadj_count++; 2987 } 2988 } 2989 xadj[1] = xadj_count; 2990 } else { 2991 if (xadj[1]) { 2992 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2993 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2994 } 2995 } 2996 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2997 if (use_square) { 2998 for (i=0;i<xadj[1];i++) { 2999 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3000 } 3001 } 3002 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3003 3004 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3005 3006 /* 3007 Restrict work on active processes only. 3008 */ 3009 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3010 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3011 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3012 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3013 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3014 if (color) { 3015 ierr = PetscFree(xadj);CHKERRQ(ierr); 3016 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3017 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3018 } else { 3019 PetscInt coarsening_ratio; 3020 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 3021 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3022 prank = rank; 3023 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 3024 /* 3025 for (i=0;i<size;i++) { 3026 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3027 } 3028 */ 3029 for (i=0;i<xadj[1];i++) { 3030 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3031 } 3032 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3033 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3034 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3035 3036 /* Partition */ 3037 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 3038 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3039 if (use_vwgt) { 3040 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3041 v_wgt[0] = local_size; 3042 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3043 } 3044 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3045 coarsening_ratio = size/n_subdomains; 3046 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3047 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3048 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3049 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3050 3051 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3052 if (contiguous) { 3053 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3054 } else { 3055 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3056 } 3057 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3058 /* clean up */ 3059 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3060 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3061 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3062 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3063 } 3064 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3065 3066 /* assemble parallel IS for sends */ 3067 i = 1; 3068 if (color) i=0; 3069 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3070 3071 /* get back IS */ 3072 *is_sends = ranks_send_to; 3073 PetscFunctionReturn(0); 3074 } 3075 3076 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3077 3078 #undef __FUNCT__ 3079 #define __FUNCT__ "MatISSubassemble" 3080 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3081 { 3082 Mat local_mat; 3083 Mat_IS *matis; 3084 IS is_sends_internal; 3085 PetscInt rows,cols,new_local_rows; 3086 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3087 PetscBool ismatis,isdense,newisdense,destroy_mat; 3088 ISLocalToGlobalMapping l2gmap; 3089 PetscInt* l2gmap_indices; 3090 const PetscInt* is_indices; 3091 MatType new_local_type; 3092 /* buffers */ 3093 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3094 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3095 PetscInt *recv_buffer_idxs_local; 3096 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3097 /* MPI */ 3098 MPI_Comm comm,comm_n; 3099 PetscSubcomm subcomm; 3100 PetscMPIInt n_sends,n_recvs,commsize; 3101 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3102 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3103 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3104 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3105 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3106 PetscErrorCode ierr; 3107 3108 PetscFunctionBegin; 3109 /* TODO: add missing checks */ 3110 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3111 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3112 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3113 PetscValidLogicalCollectiveInt(mat,nis,7); 3114 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3115 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3116 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3117 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3118 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3119 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3120 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3121 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3122 PetscInt mrows,mcols,mnrows,mncols; 3123 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3124 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3125 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3126 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3127 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3128 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3129 } 3130 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3131 PetscValidLogicalCollectiveInt(mat,bs,0); 3132 /* prepare IS for sending if not provided */ 3133 if (!is_sends) { 3134 PetscBool pcontig = PETSC_TRUE; 3135 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3136 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3137 } else { 3138 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3139 is_sends_internal = is_sends; 3140 } 3141 3142 /* get pointer of MATIS data */ 3143 matis = (Mat_IS*)mat->data; 3144 3145 /* get comm */ 3146 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3147 3148 /* compute number of sends */ 3149 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3150 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3151 3152 /* compute number of receives */ 3153 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3154 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3155 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3156 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3157 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3158 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3159 ierr = PetscFree(iflags);CHKERRQ(ierr); 3160 3161 /* restrict comm if requested */ 3162 subcomm = 0; 3163 destroy_mat = PETSC_FALSE; 3164 if (restrict_comm) { 3165 PetscMPIInt color,rank,subcommsize; 3166 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3167 color = 0; 3168 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3169 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3170 subcommsize = commsize - subcommsize; 3171 /* check if reuse has been requested */ 3172 if (reuse == MAT_REUSE_MATRIX) { 3173 if (*mat_n) { 3174 PetscMPIInt subcommsize2; 3175 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3176 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3177 comm_n = PetscObjectComm((PetscObject)*mat_n); 3178 } else { 3179 comm_n = PETSC_COMM_SELF; 3180 } 3181 } else { /* MAT_INITIAL_MATRIX */ 3182 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3183 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3184 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3185 comm_n = subcomm->comm; 3186 } 3187 /* flag to destroy *mat_n if not significative */ 3188 if (color) destroy_mat = PETSC_TRUE; 3189 } else { 3190 comm_n = comm; 3191 } 3192 3193 /* prepare send/receive buffers */ 3194 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3195 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3196 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3197 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3198 if (nis) { 3199 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3200 } 3201 3202 /* Get data from local matrices */ 3203 if (!isdense) { 3204 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3205 /* TODO: See below some guidelines on how to prepare the local buffers */ 3206 /* 3207 send_buffer_vals should contain the raw values of the local matrix 3208 send_buffer_idxs should contain: 3209 - MatType_PRIVATE type 3210 - PetscInt size_of_l2gmap 3211 - PetscInt global_row_indices[size_of_l2gmap] 3212 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3213 */ 3214 } else { 3215 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3216 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3217 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3218 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3219 send_buffer_idxs[1] = i; 3220 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3221 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3222 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3223 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3224 for (i=0;i<n_sends;i++) { 3225 ilengths_vals[is_indices[i]] = len*len; 3226 ilengths_idxs[is_indices[i]] = len+2; 3227 } 3228 } 3229 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3230 /* additional is (if any) */ 3231 if (nis) { 3232 PetscMPIInt psum; 3233 PetscInt j; 3234 for (j=0,psum=0;j<nis;j++) { 3235 PetscInt plen; 3236 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3237 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3238 psum += len+1; /* indices + lenght */ 3239 } 3240 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3241 for (j=0,psum=0;j<nis;j++) { 3242 PetscInt plen; 3243 const PetscInt *is_array_idxs; 3244 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3245 send_buffer_idxs_is[psum] = plen; 3246 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3247 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3248 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3249 psum += plen+1; /* indices + lenght */ 3250 } 3251 for (i=0;i<n_sends;i++) { 3252 ilengths_idxs_is[is_indices[i]] = psum; 3253 } 3254 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3255 } 3256 3257 buf_size_idxs = 0; 3258 buf_size_vals = 0; 3259 buf_size_idxs_is = 0; 3260 for (i=0;i<n_recvs;i++) { 3261 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3262 buf_size_vals += (PetscInt)olengths_vals[i]; 3263 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3264 } 3265 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3266 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3267 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3268 3269 /* get new tags for clean communications */ 3270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3271 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3272 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3273 3274 /* allocate for requests */ 3275 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3276 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3277 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3278 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3279 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3280 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3281 3282 /* communications */ 3283 ptr_idxs = recv_buffer_idxs; 3284 ptr_vals = recv_buffer_vals; 3285 ptr_idxs_is = recv_buffer_idxs_is; 3286 for (i=0;i<n_recvs;i++) { 3287 source_dest = onodes[i]; 3288 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3289 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3290 ptr_idxs += olengths_idxs[i]; 3291 ptr_vals += olengths_vals[i]; 3292 if (nis) { 3293 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3294 ptr_idxs_is += olengths_idxs_is[i]; 3295 } 3296 } 3297 for (i=0;i<n_sends;i++) { 3298 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3299 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3300 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3301 if (nis) { 3302 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3303 } 3304 } 3305 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3306 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3307 3308 /* assemble new l2g map */ 3309 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3310 ptr_idxs = recv_buffer_idxs; 3311 new_local_rows = 0; 3312 for (i=0;i<n_recvs;i++) { 3313 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3314 ptr_idxs += olengths_idxs[i]; 3315 } 3316 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3317 ptr_idxs = recv_buffer_idxs; 3318 new_local_rows = 0; 3319 for (i=0;i<n_recvs;i++) { 3320 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3321 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3322 ptr_idxs += olengths_idxs[i]; 3323 } 3324 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3325 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3326 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3327 3328 /* infer new local matrix type from received local matrices type */ 3329 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3330 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3331 if (n_recvs) { 3332 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3333 ptr_idxs = recv_buffer_idxs; 3334 for (i=0;i<n_recvs;i++) { 3335 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3336 new_local_type_private = MATAIJ_PRIVATE; 3337 break; 3338 } 3339 ptr_idxs += olengths_idxs[i]; 3340 } 3341 switch (new_local_type_private) { 3342 case MATDENSE_PRIVATE: 3343 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3344 new_local_type = MATSEQAIJ; 3345 bs = 1; 3346 } else { /* if I receive only 1 dense matrix */ 3347 new_local_type = MATSEQDENSE; 3348 bs = 1; 3349 } 3350 break; 3351 case MATAIJ_PRIVATE: 3352 new_local_type = MATSEQAIJ; 3353 bs = 1; 3354 break; 3355 case MATBAIJ_PRIVATE: 3356 new_local_type = MATSEQBAIJ; 3357 break; 3358 case MATSBAIJ_PRIVATE: 3359 new_local_type = MATSEQSBAIJ; 3360 break; 3361 default: 3362 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3363 break; 3364 } 3365 } else { /* by default, new_local_type is seqdense */ 3366 new_local_type = MATSEQDENSE; 3367 bs = 1; 3368 } 3369 3370 /* create MATIS object if needed */ 3371 if (reuse == MAT_INITIAL_MATRIX) { 3372 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3373 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3374 } else { 3375 /* it also destroys the local matrices */ 3376 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3377 } 3378 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3379 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3380 3381 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3382 3383 /* Global to local map of received indices */ 3384 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3385 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3386 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3387 3388 /* restore attributes -> type of incoming data and its size */ 3389 buf_size_idxs = 0; 3390 for (i=0;i<n_recvs;i++) { 3391 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3392 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3393 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3394 } 3395 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3396 3397 /* set preallocation */ 3398 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3399 if (!newisdense) { 3400 PetscInt *new_local_nnz=0; 3401 3402 ptr_vals = recv_buffer_vals; 3403 ptr_idxs = recv_buffer_idxs_local; 3404 if (n_recvs) { 3405 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3406 } 3407 for (i=0;i<n_recvs;i++) { 3408 PetscInt j; 3409 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3410 for (j=0;j<*(ptr_idxs+1);j++) { 3411 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3412 } 3413 } else { 3414 /* TODO */ 3415 } 3416 ptr_idxs += olengths_idxs[i]; 3417 } 3418 if (new_local_nnz) { 3419 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3420 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3421 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3422 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3423 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3424 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3425 } else { 3426 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3427 } 3428 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3429 } else { 3430 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3431 } 3432 3433 /* set values */ 3434 ptr_vals = recv_buffer_vals; 3435 ptr_idxs = recv_buffer_idxs_local; 3436 for (i=0;i<n_recvs;i++) { 3437 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3438 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3439 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3440 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3441 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3442 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3443 } else { 3444 /* TODO */ 3445 } 3446 ptr_idxs += olengths_idxs[i]; 3447 ptr_vals += olengths_vals[i]; 3448 } 3449 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3450 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3451 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3452 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3453 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3454 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3455 3456 #if 0 3457 if (!restrict_comm) { /* check */ 3458 Vec lvec,rvec; 3459 PetscReal infty_error; 3460 3461 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3462 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3463 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3464 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3465 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3466 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3467 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3468 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3469 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3470 } 3471 #endif 3472 3473 /* assemble new additional is (if any) */ 3474 if (nis) { 3475 PetscInt **temp_idxs,*count_is,j,psum; 3476 3477 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3478 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3479 ptr_idxs = recv_buffer_idxs_is; 3480 psum = 0; 3481 for (i=0;i<n_recvs;i++) { 3482 for (j=0;j<nis;j++) { 3483 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3484 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3485 psum += plen; 3486 ptr_idxs += plen+1; /* shift pointer to received data */ 3487 } 3488 } 3489 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3490 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3491 for (i=1;i<nis;i++) { 3492 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3493 } 3494 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3495 ptr_idxs = recv_buffer_idxs_is; 3496 for (i=0;i<n_recvs;i++) { 3497 for (j=0;j<nis;j++) { 3498 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3499 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3500 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3501 ptr_idxs += plen+1; /* shift pointer to received data */ 3502 } 3503 } 3504 for (i=0;i<nis;i++) { 3505 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3506 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3507 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3508 } 3509 ierr = PetscFree(count_is);CHKERRQ(ierr); 3510 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3511 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3512 } 3513 /* free workspace */ 3514 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3515 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3516 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3517 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3518 if (isdense) { 3519 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3520 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3521 } else { 3522 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3523 } 3524 if (nis) { 3525 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3526 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3527 } 3528 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3529 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3530 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3531 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3532 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3533 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3534 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3535 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3536 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3537 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3538 ierr = PetscFree(onodes);CHKERRQ(ierr); 3539 if (nis) { 3540 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3541 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3542 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3543 } 3544 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3545 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3546 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3547 for (i=0;i<nis;i++) { 3548 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3549 } 3550 } 3551 PetscFunctionReturn(0); 3552 } 3553 3554 /* temporary hack into ksp private data structure */ 3555 #include <petsc-private/kspimpl.h> 3556 3557 #undef __FUNCT__ 3558 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3559 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3560 { 3561 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3562 PC_IS *pcis = (PC_IS*)pc->data; 3563 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3564 MatNullSpace CoarseNullSpace=NULL; 3565 ISLocalToGlobalMapping coarse_islg; 3566 IS coarse_is,*isarray; 3567 PetscInt i,im_active=-1,active_procs=-1; 3568 PetscInt nis,nisdofs,nisneu; 3569 PC pc_temp; 3570 PCType coarse_pc_type; 3571 KSPType coarse_ksp_type; 3572 PetscBool multilevel_requested,multilevel_allowed; 3573 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3574 Mat t_coarse_mat_is; 3575 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3576 PetscMPIInt all_procs; 3577 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3578 PetscBool compute_vecs = PETSC_FALSE; 3579 PetscScalar *array; 3580 PetscErrorCode ierr; 3581 3582 PetscFunctionBegin; 3583 /* Assign global numbering to coarse dofs */ 3584 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3585 compute_vecs = PETSC_TRUE; 3586 PetscInt ocoarse_size; 3587 ocoarse_size = pcbddc->coarse_size; 3588 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3589 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3590 /* see if we can avoid some work */ 3591 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3592 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3593 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3594 coarse_reuse = PETSC_FALSE; 3595 } else { /* we can safely reuse already computed coarse matrix */ 3596 coarse_reuse = PETSC_TRUE; 3597 } 3598 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3599 coarse_reuse = PETSC_FALSE; 3600 } 3601 /* reset any subassembling information */ 3602 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3603 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3604 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3605 coarse_reuse = PETSC_TRUE; 3606 } 3607 3608 /* count "active" (i.e. with positive local size) and "void" processes */ 3609 im_active = !!(pcis->n); 3610 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3611 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3612 void_procs = all_procs-active_procs; 3613 csin_type_simple = PETSC_TRUE; 3614 redist = PETSC_FALSE; 3615 if (pcbddc->current_level && void_procs) { 3616 csin_ml = PETSC_TRUE; 3617 ncoarse_ml = void_procs; 3618 csin_ds = PETSC_TRUE; 3619 ncoarse_ds = void_procs; 3620 } else { 3621 csin_ml = PETSC_FALSE; 3622 ncoarse_ml = all_procs; 3623 if (void_procs) { 3624 csin_ds = PETSC_TRUE; 3625 ncoarse_ds = void_procs; 3626 csin_type_simple = PETSC_FALSE; 3627 } else { 3628 if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) { 3629 csin_ds = PETSC_TRUE; 3630 ncoarse_ds = pcbddc->redistribute_coarse; 3631 redist = PETSC_TRUE; 3632 } else { 3633 csin_ds = PETSC_FALSE; 3634 ncoarse_ds = all_procs; 3635 } 3636 } 3637 } 3638 3639 /* 3640 test if we can go multilevel: three conditions must be satisfied: 3641 - we have not exceeded the number of levels requested 3642 - we can actually subassemble the active processes 3643 - we can find a suitable number of MPI processes where we can place the subassembled problem 3644 */ 3645 multilevel_allowed = PETSC_FALSE; 3646 multilevel_requested = PETSC_FALSE; 3647 if (pcbddc->current_level < pcbddc->max_levels) { 3648 multilevel_requested = PETSC_TRUE; 3649 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3650 multilevel_allowed = PETSC_FALSE; 3651 } else { 3652 multilevel_allowed = PETSC_TRUE; 3653 } 3654 } 3655 /* determine number of process partecipating to coarse solver */ 3656 if (multilevel_allowed) { 3657 ncoarse = ncoarse_ml; 3658 csin = csin_ml; 3659 } else { 3660 ncoarse = ncoarse_ds; 3661 csin = csin_ds; 3662 } 3663 3664 /* creates temporary l2gmap and IS for coarse indexes */ 3665 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3666 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3667 3668 /* creates temporary MATIS object for coarse matrix */ 3669 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3670 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3671 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3672 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3673 #if 0 3674 { 3675 PetscViewer viewer; 3676 char filename[256]; 3677 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3678 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3679 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3680 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3681 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3682 } 3683 #endif 3684 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3685 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3686 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3687 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3688 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3689 3690 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3691 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3692 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3693 const PetscInt *idxs; 3694 ISLocalToGlobalMapping tmap; 3695 3696 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3697 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3698 /* allocate space for temporary storage */ 3699 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3700 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3701 /* allocate for IS array */ 3702 nisdofs = pcbddc->n_ISForDofsLocal; 3703 nisneu = !!pcbddc->NeumannBoundariesLocal; 3704 nis = nisdofs + nisneu; 3705 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3706 /* dofs splitting */ 3707 for (i=0;i<nisdofs;i++) { 3708 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3709 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3710 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3711 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3712 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3713 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3714 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3715 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3716 } 3717 /* neumann boundaries */ 3718 if (pcbddc->NeumannBoundariesLocal) { 3719 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3720 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3721 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3722 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3723 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3724 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3725 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3726 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3727 } 3728 /* free memory */ 3729 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3730 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3731 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3732 } else { 3733 nis = 0; 3734 nisdofs = 0; 3735 nisneu = 0; 3736 isarray = NULL; 3737 } 3738 /* destroy no longer needed map */ 3739 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3740 3741 /* restrict on coarse candidates (if needed) */ 3742 coarse_mat_is = NULL; 3743 if (csin) { 3744 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3745 if (redist) { 3746 PetscMPIInt rank; 3747 PetscInt spc,n_spc_p1,dest[1]; 3748 3749 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3750 spc = all_procs/pcbddc->redistribute_coarse; 3751 n_spc_p1 = all_procs%pcbddc->redistribute_coarse; 3752 if (rank > n_spc_p1*(spc+1)-1) { 3753 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3754 } else { 3755 dest[0] = rank/(spc+1); 3756 } 3757 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3758 } else { 3759 PetscInt j,tissize,*nisindices; 3760 PetscInt *coarse_candidates; 3761 const PetscInt* tisindices; 3762 /* get coarse candidates' ranks in pc communicator */ 3763 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3764 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3765 for (i=0,j=0;i<all_procs;i++) { 3766 if (!coarse_candidates[i]) { 3767 coarse_candidates[j]=i; 3768 j++; 3769 } 3770 } 3771 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3772 /* get a suitable subassembling pattern */ 3773 if (csin_type_simple) { 3774 PetscMPIInt rank; 3775 PetscInt issize,isidx; 3776 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3777 if (im_active) { 3778 issize = 1; 3779 isidx = (PetscInt)rank; 3780 } else { 3781 issize = 0; 3782 isidx = -1; 3783 } 3784 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3785 } else { 3786 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3787 } 3788 if (pcbddc->dbg_flag) { 3789 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3790 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3791 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3792 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3793 for (i=0;i<j;i++) { 3794 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3795 } 3796 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3797 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3798 } 3799 /* shift the pattern on coarse candidates */ 3800 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3801 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3802 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 3803 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3804 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3805 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3806 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3807 } 3808 } 3809 if (pcbddc->dbg_flag) { 3810 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3811 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3812 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3813 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3814 } 3815 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3816 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3817 } else { 3818 if (pcbddc->dbg_flag) { 3819 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3820 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3821 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3822 } 3823 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3824 coarse_mat_is = t_coarse_mat_is; 3825 } 3826 3827 /* create local to global scatters for coarse problem */ 3828 if (compute_vecs) { 3829 PetscInt lrows; 3830 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3831 if (coarse_mat_is) { 3832 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3833 } else { 3834 lrows = 0; 3835 } 3836 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3837 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3838 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3839 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3840 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3841 } 3842 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3843 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3844 3845 /* set defaults for coarse KSP and PC */ 3846 if (multilevel_allowed) { 3847 coarse_ksp_type = KSPRICHARDSON; 3848 coarse_pc_type = PCBDDC; 3849 } else { 3850 coarse_ksp_type = KSPPREONLY; 3851 coarse_pc_type = PCREDUNDANT; 3852 } 3853 3854 /* print some info if requested */ 3855 if (pcbddc->dbg_flag) { 3856 if (!multilevel_allowed) { 3857 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3858 if (multilevel_requested) { 3859 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3860 } else if (pcbddc->max_levels) { 3861 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3862 } 3863 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3864 } 3865 } 3866 3867 /* create the coarse KSP object only once with defaults */ 3868 if (coarse_mat_is) { 3869 MatReuse coarse_mat_reuse; 3870 PetscViewer dbg_viewer = NULL; 3871 if (pcbddc->dbg_flag) { 3872 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3873 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3874 } 3875 if (!pcbddc->coarse_ksp) { 3876 char prefix[256],str_level[16]; 3877 size_t len; 3878 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3879 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3880 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3881 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 3882 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3883 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3884 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3885 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3886 /* prefix */ 3887 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3888 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3889 if (!pcbddc->current_level) { 3890 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3891 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3892 } else { 3893 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3894 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3895 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3896 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 3897 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3898 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3899 } 3900 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3901 /* allow user customization */ 3902 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3903 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 3904 } 3905 3906 /* get some info after set from options */ 3907 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3908 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3909 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3910 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 3911 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3912 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3913 isbddc = PETSC_FALSE; 3914 } 3915 if (isredundant) { 3916 KSP inner_ksp; 3917 PC inner_pc; 3918 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 3919 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 3920 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 3921 } 3922 3923 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3924 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3925 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3926 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3927 if (nisdofs) { 3928 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3929 for (i=0;i<nisdofs;i++) { 3930 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3931 } 3932 } 3933 if (nisneu) { 3934 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3935 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3936 } 3937 3938 /* assemble coarse matrix */ 3939 if (coarse_reuse) { 3940 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 3941 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3942 coarse_mat_reuse = MAT_REUSE_MATRIX; 3943 } else { 3944 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3945 } 3946 if (isbddc || isnn) { 3947 if (pcbddc->coarsening_ratio > 1) { 3948 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3949 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3950 if (pcbddc->dbg_flag) { 3951 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3952 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3953 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3954 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3955 } 3956 } 3957 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3958 } else { 3959 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 3960 coarse_mat = coarse_mat_is; 3961 } 3962 } else { 3963 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3964 } 3965 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3966 3967 /* propagate symmetry info to coarse matrix */ 3968 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 3969 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3970 3971 /* set operators */ 3972 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 3973 if (pcbddc->dbg_flag) { 3974 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3975 } 3976 } else { /* processes non partecipating to coarse solver (if any) */ 3977 coarse_mat = 0; 3978 } 3979 ierr = PetscFree(isarray);CHKERRQ(ierr); 3980 #if 0 3981 { 3982 PetscViewer viewer; 3983 char filename[256]; 3984 sprintf(filename,"coarse_mat.m"); 3985 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 3986 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3987 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 3988 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3989 } 3990 #endif 3991 3992 /* Compute coarse null space (special handling by BDDC only) */ 3993 if (pcbddc->NullSpace) { 3994 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3995 } 3996 3997 if (pcbddc->coarse_ksp) { 3998 Vec crhs,csol; 3999 PetscBool ispreonly; 4000 if (CoarseNullSpace) { 4001 if (isbddc) { 4002 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4003 } else { 4004 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4005 } 4006 } 4007 /* setup coarse ksp */ 4008 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4009 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4010 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4011 /* hack */ 4012 if (!csol) { 4013 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4014 } 4015 if (!crhs) { 4016 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4017 } 4018 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4019 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4020 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4021 KSP check_ksp; 4022 KSPType check_ksp_type; 4023 PC check_pc; 4024 Vec check_vec,coarse_vec; 4025 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4026 PetscInt its; 4027 PetscBool compute_eigs; 4028 PetscReal *eigs_r,*eigs_c; 4029 PetscInt neigs; 4030 const char *prefix; 4031 4032 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4033 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4034 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4035 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4036 if (ispreonly) { 4037 check_ksp_type = KSPPREONLY; 4038 compute_eigs = PETSC_FALSE; 4039 } else { 4040 check_ksp_type = KSPGMRES; 4041 compute_eigs = PETSC_TRUE; 4042 } 4043 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4044 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4045 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4046 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4047 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4048 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4049 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4050 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4051 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4052 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4053 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4054 /* create random vec */ 4055 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4056 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4057 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4058 if (CoarseNullSpace) { 4059 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4060 } 4061 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4062 /* solve coarse problem */ 4063 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4064 if (CoarseNullSpace) { 4065 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4066 } 4067 /* set eigenvalue estimation if preonly has not been requested */ 4068 if (compute_eigs) { 4069 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4070 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4071 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4072 lambda_max = eigs_r[neigs-1]; 4073 lambda_min = eigs_r[0]; 4074 if (pcbddc->use_coarse_estimates) { 4075 if (lambda_max>lambda_min) { 4076 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4077 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4078 } 4079 } 4080 } 4081 4082 /* check coarse problem residual error */ 4083 if (pcbddc->dbg_flag) { 4084 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4085 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4086 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4087 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4088 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4089 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4090 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4091 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4092 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4093 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4094 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4095 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4096 if (compute_eigs) { 4097 PetscReal lambda_max_s,lambda_min_s; 4098 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4099 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4100 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4101 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4102 for (i=0;i<neigs;i++) { 4103 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4104 } 4105 } 4106 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4107 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4108 } 4109 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4110 if (compute_eigs) { 4111 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4112 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4113 } 4114 } 4115 } 4116 /* print additional info */ 4117 if (pcbddc->dbg_flag) { 4118 /* waits until all processes reaches this point */ 4119 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4121 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4122 } 4123 4124 /* free memory */ 4125 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4126 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4127 PetscFunctionReturn(0); 4128 } 4129 4130 #undef __FUNCT__ 4131 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4132 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4133 { 4134 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4135 PC_IS* pcis = (PC_IS*)pc->data; 4136 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4137 PetscInt i,coarse_size; 4138 PetscInt *local_primal_indices; 4139 PetscErrorCode ierr; 4140 4141 PetscFunctionBegin; 4142 /* Compute global number of coarse dofs */ 4143 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4144 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4145 } 4146 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4147 4148 /* check numbering */ 4149 if (pcbddc->dbg_flag) { 4150 PetscScalar coarsesum,*array; 4151 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4152 4153 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4156 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4157 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4158 for (i=0;i<pcbddc->local_primal_size;i++) { 4159 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4160 } 4161 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4162 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4163 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4164 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4165 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4166 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4167 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4168 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4169 for (i=0;i<pcis->n;i++) { 4170 if (array[i] == 1.0) { 4171 set_error = PETSC_TRUE; 4172 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4173 } 4174 } 4175 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4176 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4177 for (i=0;i<pcis->n;i++) { 4178 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4179 } 4180 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4181 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4182 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4183 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4184 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4186 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4187 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4189 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4190 for (i=0;i<pcbddc->local_primal_size;i++) { 4191 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4192 } 4193 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4194 } 4195 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4196 if (set_error_reduced) { 4197 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4198 } 4199 } 4200 /* get back data */ 4201 *coarse_size_n = coarse_size; 4202 *local_primal_indices_n = local_primal_indices; 4203 PetscFunctionReturn(0); 4204 } 4205 4206 #undef __FUNCT__ 4207 #define __FUNCT__ "PCBDDCGlobalToLocal" 4208 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4209 { 4210 IS localis_t; 4211 PetscInt i,lsize,*idxs,n; 4212 PetscScalar *vals; 4213 PetscErrorCode ierr; 4214 4215 PetscFunctionBegin; 4216 /* get indices in local ordering exploiting local to global map */ 4217 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4218 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4219 for (i=0;i<lsize;i++) vals[i] = 1.0; 4220 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4221 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4222 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4223 if (idxs) { /* multilevel guard */ 4224 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4225 } 4226 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4227 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4228 ierr = PetscFree(vals);CHKERRQ(ierr); 4229 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4230 /* now compute set in local ordering */ 4231 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4232 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4233 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4234 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4235 for (i=0,lsize=0;i<n;i++) { 4236 if (PetscRealPart(vals[i]) > 0.5) { 4237 lsize++; 4238 } 4239 } 4240 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4241 for (i=0,lsize=0;i<n;i++) { 4242 if (PetscRealPart(vals[i]) > 0.5) { 4243 idxs[lsize++] = i; 4244 } 4245 } 4246 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4247 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4248 *localis = localis_t; 4249 PetscFunctionReturn(0); 4250 } 4251 4252 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4253 #undef __FUNCT__ 4254 #define __FUNCT__ "PCBDDCMatMult_Private" 4255 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4256 { 4257 PCBDDCChange_ctx change_ctx; 4258 PetscErrorCode ierr; 4259 4260 PetscFunctionBegin; 4261 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4262 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4263 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4264 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4265 PetscFunctionReturn(0); 4266 } 4267 4268 #undef __FUNCT__ 4269 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4270 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4271 { 4272 PCBDDCChange_ctx change_ctx; 4273 PetscErrorCode ierr; 4274 4275 PetscFunctionBegin; 4276 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4277 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4278 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4279 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4280 PetscFunctionReturn(0); 4281 } 4282