1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_constraints,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 131 n_R = pcis->n-pcbddc->n_actual_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (pcbddc->local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 189 const PetscScalar *array,*row_cmat_values; 190 const PetscInt *row_cmat_indices,*idx_R_local; 191 PetscInt *idx_V_B,*auxindices; 192 PetscInt n_vertices,n_constraints,size_of_constraint; 193 PetscInt i,j,n_R,n_D,n_B; 194 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 195 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 196 MatType impMatType; 197 /* some shortcuts to scalars */ 198 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 199 /* for debugging purposes */ 200 PetscReal *coarsefunctions_errors,*constraints_errors; 201 202 PetscFunctionBegin; 203 /* get number of vertices (corners plus constraints with change of basis) 204 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 205 n_vertices = pcbddc->n_actual_vertices; 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 220 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 221 if (i != n_vertices) { 222 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 223 } 224 225 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 226 if (n_constraints) { 227 /* see if we can save some allocations */ 228 if (pcbddc->local_auxmat2) { 229 PetscInt on_R,on_constraints; 230 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 231 if (on_R != n_R || on_constraints != n_constraints) { 232 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 233 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 234 } 235 } 236 /* work vectors */ 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 239 /* auxiliary matrices */ 240 if (!pcbddc->local_auxmat2) { 241 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 242 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 243 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 244 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 245 } 246 247 /* Extract constraints on R nodes: C_{CR} */ 248 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 249 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 250 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 251 252 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 253 for (i=0;i<n_constraints;i++) { 254 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 255 /* Get row of constraint matrix in R numbering */ 256 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 257 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 258 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 260 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 261 /* Solve for row of constraint matrix in R numbering */ 262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 263 /* Set values in local_auxmat2 */ 264 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 265 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 } 268 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 271 272 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 273 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 274 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 275 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 276 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 277 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(M1);CHKERRQ(ierr); 279 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 280 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 281 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 282 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 284 ierr = MatDestroy(&M2);CHKERRQ(ierr); 285 ierr = MatDestroy(&M3);CHKERRQ(ierr); 286 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 287 if (!pcbddc->local_auxmat1) { 288 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 289 } else { 290 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } 292 } 293 294 /* Get submatrices from subdomain matrix */ 295 if (n_vertices) { 296 PetscInt ibs,mbs; 297 PetscBool issbaij; 298 Mat newmat; 299 300 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 301 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 302 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 303 if (ibs != mbs) { /* need to convert to SEQAIJ */ 304 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 308 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 309 } else { 310 /* this is safe */ 311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 313 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 314 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 315 /* which of the two approaches is faster? */ 316 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 317 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 318 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 320 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 321 } else { 322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 } 325 } 326 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 327 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 328 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 329 } 330 331 /* Matrix of coarse basis functions (local) */ 332 if (pcbddc->coarse_phi_B) { 333 PetscInt on_B,on_primal; 334 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 335 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 336 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 337 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 338 } 339 } 340 if (pcbddc->coarse_phi_D) { 341 PetscInt on_D,on_primal; 342 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 343 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 344 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 345 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 346 } 347 } 348 if (!pcbddc->coarse_phi_B) { 349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 350 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 351 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 352 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 } 354 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 355 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 356 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 357 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 358 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 359 } 360 361 if (pcbddc->dbg_flag) { 362 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 363 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr); 364 ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr); 365 } 366 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 367 ierr = PetscMalloc1((pcbddc->local_primal_size)*(pcbddc->local_primal_size),&coarse_submat_vals);CHKERRQ(ierr); 368 369 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 370 371 /* vertices */ 372 for (i=0;i<n_vertices;i++) { 373 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 374 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 375 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 376 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 379 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 380 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 383 if (n_constraints) { 384 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 385 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 387 } 388 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 389 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 390 391 /* Set values in coarse basis function and subdomain part of coarse_mat */ 392 /* coarse basis functions */ 393 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 400 if (pcbddc->switch_static || pcbddc->dbg_flag) { 401 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 } 407 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 408 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 409 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 if (n_constraints) { 412 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 } 416 417 /* check */ 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on local nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 426 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 427 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 428 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 if (n_constraints) { 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 } 437 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 440 /* check saddle point solution */ 441 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 444 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 445 /* shift by the identity matrix */ 446 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 447 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 450 } 451 } 452 453 /* constraints */ 454 for (i=0;i<n_constraints;i++) { 455 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 456 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 457 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 458 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 459 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 460 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 461 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 462 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 463 if (n_vertices) { 464 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 465 } 466 /* Set values in coarse basis function and subdomain part of coarse_mat */ 467 /* coarse basis functions */ 468 j = i+n_vertices; /* don't touch this! */ 469 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 470 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 473 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 474 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 476 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 } 482 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 483 if (n_vertices) { 484 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 485 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 } 488 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 492 if (pcbddc->dbg_flag) { 493 /* assemble subdomain vector on nodes */ 494 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 495 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 496 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 499 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 500 /* assemble subdomain vector of lagrange multipliers */ 501 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 513 /* check saddle point solution */ 514 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 517 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 518 /* shift by the identity matrix */ 519 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 520 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 523 } 524 } 525 /* call assembling routines for local coarse basis */ 526 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 if (pcbddc->switch_static || pcbddc->dbg_flag) { 529 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 } 532 533 /* compute other basis functions for non-symmetric problems */ 534 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 535 if (!setsym || (setsym && !issym)) { 536 if (!pcbddc->coarse_psi_B) { 537 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 538 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 540 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 541 } 542 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 543 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 544 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 545 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 546 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 547 } 548 for (i=0;i<pcbddc->local_primal_size;i++) { 549 if (n_constraints) { 550 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 551 for (j=0;j<n_constraints;j++) { 552 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 553 } 554 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 555 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 556 } 557 if (i<n_vertices) { 558 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 559 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 560 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 561 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 562 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 563 if (n_constraints) { 564 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 565 } 566 } else { 567 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 568 } 569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 570 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 574 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 if (i<n_vertices) { 577 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 578 } 579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 580 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 583 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 584 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 } 586 587 if (pcbddc->dbg_flag) { 588 /* assemble subdomain vector on nodes */ 589 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 590 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 591 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 if (i<n_vertices) { 594 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 595 } 596 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 597 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 598 /* assemble subdomain vector of lagrange multipliers */ 599 for (j=0;j<pcbddc->local_primal_size;j++) { 600 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 601 } 602 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 604 /* check saddle point solution */ 605 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 608 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 609 /* shift by the identity matrix */ 610 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 611 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 614 } 615 } 616 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 619 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 } 622 unsymmetric_check = PETSC_TRUE; 623 } else { /* take references to already computed coarse basis */ 624 unsymmetric_check = PETSC_FALSE; 625 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 626 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 627 if (pcbddc->coarse_phi_D) { 628 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 629 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 630 } 631 } 632 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 633 /* Checking coarse_sub_mat and coarse basis functios */ 634 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 635 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 if (pcbddc->dbg_flag) { 637 Mat coarse_sub_mat; 638 Mat AUXMAT,TM1,TM2,TM3,TM4; 639 Mat coarse_phi_D,coarse_phi_B; 640 Mat coarse_psi_D,coarse_psi_B; 641 Mat A_II,A_BB,A_IB,A_BI; 642 MatType checkmattype=MATSEQAIJ; 643 PetscReal real_value; 644 645 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 646 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 649 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 651 if (unsymmetric_check) { 652 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 653 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 654 } 655 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 656 657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 660 if (unsymmetric_check) { 661 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 662 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 664 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 665 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 666 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 667 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 668 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 670 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 671 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 673 } else { 674 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 675 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 676 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 677 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 678 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 679 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 680 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 681 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 682 } 683 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 684 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 687 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 688 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 689 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 694 for (i=0;i<pcbddc->local_primal_size;i++) { 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 698 for (i=0;i<pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 700 } 701 if (unsymmetric_check) { 702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 703 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 705 } 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 707 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 709 } 710 } 711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 712 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 716 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 720 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 722 if (unsymmetric_check) { 723 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 724 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 725 } 726 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 727 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 728 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 729 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 730 } 731 /* free memory */ 732 if (n_vertices) { 733 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 734 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 738 } 739 if (n_constraints) { 740 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 741 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 742 ierr = MatDestroy(&M1);CHKERRQ(ierr); 743 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 744 } 745 ierr = PetscFree(auxindices);CHKERRQ(ierr); 746 /* get back data */ 747 *coarse_submat_vals_n = coarse_submat_vals; 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 753 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 754 { 755 PC_IS* pcis = (PC_IS*)(pc->data); 756 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 757 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 758 PetscBool issbaij,isseqaij; 759 /* manage repeated solves */ 760 MatReuse reuse; 761 PetscErrorCode ierr; 762 763 PetscFunctionBegin; 764 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 765 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 766 } 767 /* get mat flags */ 768 reuse = MAT_INITIAL_MATRIX; 769 if (pc->setupcalled) { 770 if (pc->flag == SAME_NONZERO_PATTERN) { 771 reuse = MAT_REUSE_MATRIX; 772 } else { 773 reuse = MAT_INITIAL_MATRIX; 774 } 775 } 776 if (reuse == MAT_INITIAL_MATRIX) { 777 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 778 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 779 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 780 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 781 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 782 } 783 784 /* transform local matrices if needed */ 785 if (pcbddc->use_change_of_basis) { 786 Mat change_mat_all; 787 PetscScalar *row_cmat_values; 788 PetscInt *row_cmat_indices; 789 PetscInt *nnz,*is_indices,*temp_indices; 790 PetscInt i,j,k,n_D,n_B; 791 792 /* Get Non-overlapping dimensions */ 793 n_B = pcis->n_B; 794 n_D = pcis->n-n_B; 795 796 /* compute nonzero structure of change of basis on all local nodes */ 797 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 798 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 799 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 800 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 801 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 802 k=1; 803 for (i=0;i<n_B;i++) { 804 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 805 nnz[is_indices[i]]=j; 806 if (k < j) k = j; 807 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 808 } 809 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 810 /* assemble change of basis matrix on the whole set of local dofs */ 811 ierr = PetscMalloc1(k,&temp_indices);CHKERRQ(ierr); 812 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 813 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 814 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 815 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 816 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 817 for (i=0;i<n_D;i++) { 818 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 819 } 820 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 821 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 822 for (i=0;i<n_B;i++) { 823 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 824 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 825 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 826 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 827 } 828 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 829 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 830 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 831 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 832 if (isseqaij) { 833 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 834 } else { 835 Mat work_mat; 836 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 837 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 838 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 839 } 840 /* 841 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 842 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 843 */ 844 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 845 ierr = PetscFree(nnz);CHKERRQ(ierr); 846 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 847 } else { 848 /* without change of basis, the local matrix is unchanged */ 849 if (!pcbddc->local_mat) { 850 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 851 pcbddc->local_mat = matis->A; 852 } 853 } 854 855 /* get submatrices */ 856 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 857 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 858 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 859 if (!issbaij) { 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 862 } else { 863 Mat newmat; 864 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 865 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 866 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 867 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 868 } 869 PetscFunctionReturn(0); 870 } 871 872 #undef __FUNCT__ 873 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 874 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 875 { 876 PC_IS* pcis = (PC_IS*)(pc->data); 877 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 878 IS is_aux1,is_aux2; 879 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 880 PetscInt n_vertices,i,j,n_R,n_D,n_B; 881 PetscInt vbs,bs; 882 PetscBT bitmask; 883 PetscErrorCode ierr; 884 885 PetscFunctionBegin; 886 /* 887 No need to setup local scatters if 888 - primal space is unchanged 889 AND 890 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 891 AND 892 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 893 */ 894 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 895 PetscFunctionReturn(0); 896 } 897 /* destroy old objects */ 898 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 899 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 900 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 901 /* Set Non-overlapping dimensions */ 902 n_B = pcis->n_B; n_D = pcis->n - n_B; 903 n_vertices = pcbddc->n_actual_vertices; 904 /* create auxiliary bitmask */ 905 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 906 for (i=0;i<n_vertices;i++) { 907 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 908 } 909 910 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 911 ierr = PetscMalloc1((pcis->n-n_vertices),&idx_R_local);CHKERRQ(ierr); 912 for (i=0, n_R=0; i<pcis->n; i++) { 913 if (!PetscBTLookup(bitmask,i)) { 914 idx_R_local[n_R] = i; 915 n_R++; 916 } 917 } 918 919 /* Block code */ 920 vbs = 1; 921 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 922 if (bs>1 && !(n_vertices%bs)) { 923 PetscBool is_blocked = PETSC_TRUE; 924 PetscInt *vary; 925 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 926 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 927 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 928 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 929 for (i=0; i<n_vertices; i++) { 930 if (vary[i]!=0 && vary[i]!=bs) { 931 is_blocked = PETSC_FALSE; 932 break; 933 } 934 } 935 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 936 vbs = bs; 937 for (i=0;i<n_R/vbs;i++) { 938 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 939 } 940 } 941 ierr = PetscFree(vary);CHKERRQ(ierr); 942 } 943 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 944 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 945 946 /* print some info if requested */ 947 if (pcbddc->dbg_flag) { 948 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 950 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 952 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 955 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 956 } 957 958 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 959 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 960 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array1);CHKERRQ(ierr); 961 ierr = PetscMalloc1((pcis->n_B-n_vertices),&aux_array2);CHKERRQ(ierr); 962 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 963 for (i=0; i<n_D; i++) { 964 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 965 } 966 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 967 for (i=0, j=0; i<n_R; i++) { 968 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 969 aux_array1[j++] = i; 970 } 971 } 972 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 973 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 974 for (i=0, j=0; i<n_B; i++) { 975 if (!PetscBTLookup(bitmask,is_indices[i])) { 976 aux_array2[j++] = i; 977 } 978 } 979 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 980 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 981 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 982 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 983 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 984 985 if (pcbddc->switch_static || pcbddc->dbg_flag) { 986 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 987 for (i=0, j=0; i<n_R; i++) { 988 if (PetscBTLookup(bitmask,idx_R_local[i])) { 989 aux_array1[j++] = i; 990 } 991 } 992 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 993 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 994 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 995 } 996 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 997 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1004 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1005 { 1006 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1007 PC_IS *pcis = (PC_IS*)pc->data; 1008 PC pc_temp; 1009 Mat A_RR; 1010 MatReuse reuse; 1011 PetscScalar m_one = -1.0; 1012 PetscReal value; 1013 PetscInt n_D,n_R,ibs,mbs; 1014 PetscBool use_exact,use_exact_reduced,issbaij; 1015 PetscErrorCode ierr; 1016 /* prefixes stuff */ 1017 char dir_prefix[256],neu_prefix[256],str_level[16]; 1018 size_t len; 1019 1020 PetscFunctionBegin; 1021 1022 /* compute prefixes */ 1023 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1024 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1025 if (!pcbddc->current_level) { 1026 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1027 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1028 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1029 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1030 } else { 1031 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1032 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1033 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1034 len -= 15; /* remove "pc_bddc_coarse_" */ 1035 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1036 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1037 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1038 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1039 *(dir_prefix+len)='\0'; 1040 *(neu_prefix+len)='\0'; 1041 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1042 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1043 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1044 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1045 } 1046 1047 /* DIRICHLET PROBLEM */ 1048 /* Matrix for Dirichlet problem is pcis->A_II */ 1049 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1050 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1051 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1052 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1053 /* default */ 1054 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1055 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1056 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1057 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1058 if (issbaij) { 1059 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1060 } else { 1061 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1062 } 1063 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1064 /* Allow user's customization */ 1065 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1066 } 1067 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1068 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1069 if (!n_D) { 1070 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1071 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1072 } 1073 /* Set Up KSP for Dirichlet problem of BDDC */ 1074 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1075 /* set ksp_D into pcis data */ 1076 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1077 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1078 pcis->ksp_D = pcbddc->ksp_D; 1079 1080 /* NEUMANN PROBLEM */ 1081 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1082 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1083 if (pcbddc->ksp_R) { /* already created ksp */ 1084 PetscInt nn_R; 1085 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1086 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1087 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1088 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1089 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1090 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1091 reuse = MAT_INITIAL_MATRIX; 1092 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1093 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1094 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1095 reuse = MAT_INITIAL_MATRIX; 1096 } else { /* safe to reuse the matrix */ 1097 reuse = MAT_REUSE_MATRIX; 1098 } 1099 } 1100 /* last check */ 1101 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1102 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1103 reuse = MAT_INITIAL_MATRIX; 1104 } 1105 } else { /* first time, so we need to create the matrix */ 1106 reuse = MAT_INITIAL_MATRIX; 1107 } 1108 /* extract A_RR */ 1109 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1110 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1111 if (ibs != mbs) { 1112 Mat newmat; 1113 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1114 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1115 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1116 } else { 1117 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1118 } 1119 if (!pcbddc->ksp_R) { /* create object if not present */ 1120 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1121 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1122 /* default */ 1123 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1124 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1125 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1126 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1127 if (issbaij) { 1128 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1129 } else { 1130 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1131 } 1132 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1133 /* Allow user's customization */ 1134 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1135 } 1136 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1137 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1138 if (!n_R) { 1139 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1140 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1141 } 1142 /* Set Up KSP for Neumann problem of BDDC */ 1143 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1144 1145 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1146 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1147 /* Dirichlet */ 1148 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1149 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1150 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1151 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1152 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1153 /* need to be adapted? */ 1154 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1155 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1156 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1157 /* print info */ 1158 if (pcbddc->dbg_flag) { 1159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1160 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1162 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1163 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1165 } 1166 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1167 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1168 } 1169 1170 /* Neumann */ 1171 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1172 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1173 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1174 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1175 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1176 /* need to be adapted? */ 1177 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1178 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1179 /* print info */ 1180 if (pcbddc->dbg_flag) { 1181 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1183 } 1184 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1185 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1186 } 1187 } 1188 /* free Neumann problem's matrix */ 1189 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1195 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1196 { 1197 PetscErrorCode ierr; 1198 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1199 1200 PetscFunctionBegin; 1201 if (applytranspose) { 1202 if (pcbddc->local_auxmat1) { 1203 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1204 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1205 } 1206 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1207 } else { 1208 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1209 if (pcbddc->local_auxmat1) { 1210 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1211 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1212 } 1213 } 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1220 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1221 { 1222 PetscErrorCode ierr; 1223 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1224 PC_IS* pcis = (PC_IS*) (pc->data); 1225 const PetscScalar zero = 0.0; 1226 1227 PetscFunctionBegin; 1228 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1229 if (applytranspose) { 1230 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1231 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1232 } else { 1233 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1234 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1235 } 1236 /* start communications from local primal nodes to rhs of coarse solver */ 1237 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1238 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 1241 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1242 /* TODO remove null space when doing multilevel */ 1243 if (pcbddc->coarse_ksp) { 1244 if (applytranspose) { 1245 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1246 } else { 1247 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1248 } 1249 } 1250 /* start communications from coarse solver solution to local primal nodes */ 1251 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1252 1253 /* Local solution on R nodes */ 1254 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1255 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1257 if (pcbddc->switch_static) { 1258 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1259 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1260 } 1261 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1262 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1263 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1265 if (pcbddc->switch_static) { 1266 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268 } 1269 1270 /* complete communications from coarse sol to local primal nodes */ 1271 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1272 1273 /* Sum contributions from two levels */ 1274 if (applytranspose) { 1275 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1276 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1277 } else { 1278 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1279 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1280 } 1281 PetscFunctionReturn(0); 1282 } 1283 1284 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1287 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1288 { 1289 PetscErrorCode ierr; 1290 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1291 PetscScalar *array,*array2; 1292 Vec from,to; 1293 1294 PetscFunctionBegin; 1295 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1296 from = pcbddc->coarse_vec; 1297 to = pcbddc->vec1_P; 1298 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1299 Vec tvec; 1300 PetscInt lsize; 1301 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1302 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1303 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1304 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1305 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1306 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1307 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1308 } 1309 } else { /* from local to global -> put data in coarse right hand side */ 1310 from = pcbddc->vec1_P; 1311 to = pcbddc->coarse_vec; 1312 } 1313 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1319 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1320 { 1321 PetscErrorCode ierr; 1322 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1323 PetscScalar *array,*array2; 1324 Vec from,to; 1325 1326 PetscFunctionBegin; 1327 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1328 from = pcbddc->coarse_vec; 1329 to = pcbddc->vec1_P; 1330 } else { /* from local to global -> put data in coarse right hand side */ 1331 from = pcbddc->vec1_P; 1332 to = pcbddc->coarse_vec; 1333 } 1334 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1335 if (smode == SCATTER_FORWARD) { 1336 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1337 Vec tvec; 1338 PetscInt lsize; 1339 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1340 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1341 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1342 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1343 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1344 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1345 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1346 } 1347 } 1348 PetscFunctionReturn(0); 1349 } 1350 1351 /* uncomment for testing purposes */ 1352 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1355 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1356 { 1357 PetscErrorCode ierr; 1358 PC_IS* pcis = (PC_IS*)(pc->data); 1359 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1360 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1361 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1362 MatType impMatType=MATSEQAIJ; 1363 /* one and zero */ 1364 PetscScalar one=1.0,zero=0.0; 1365 /* space to store constraints and their local indices */ 1366 PetscScalar *temp_quadrature_constraint; 1367 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1368 /* iterators */ 1369 PetscInt i,j,k,total_counts,temp_start_ptr; 1370 /* stuff to store connected components stored in pcbddc->mat_graph */ 1371 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1372 PetscInt n_ISForFaces,n_ISForEdges; 1373 /* near null space stuff */ 1374 MatNullSpace nearnullsp; 1375 const Vec *nearnullvecs; 1376 Vec *localnearnullsp; 1377 PetscBool nnsp_has_cnst; 1378 PetscInt nnsp_size; 1379 PetscScalar *array; 1380 /* BLAS integers */ 1381 PetscBLASInt lwork,lierr; 1382 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1383 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1384 /* LAPACK working arrays for SVD or POD */ 1385 PetscBool skip_lapack; 1386 PetscScalar *work; 1387 PetscReal *singular_vals; 1388 #if defined(PETSC_USE_COMPLEX) 1389 PetscReal *rwork; 1390 #endif 1391 #if defined(PETSC_MISSING_LAPACK_GESVD) 1392 PetscBLASInt Blas_one_2=1; 1393 PetscScalar *temp_basis,*correlation_mat; 1394 #else 1395 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1396 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1397 #endif 1398 /* reuse */ 1399 PetscInt olocal_primal_size; 1400 PetscInt *oprimal_indices_local_idxs; 1401 /* change of basis */ 1402 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1403 PetscBool boolforchange,qr_needed; 1404 PetscBT touched,change_basis,qr_needed_idx; 1405 /* auxiliary stuff */ 1406 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1407 /* some quantities */ 1408 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1409 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1410 1411 1412 PetscFunctionBegin; 1413 /* Destroy Mat objects computed previously */ 1414 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1415 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1416 /* Get index sets for faces, edges and vertices from graph */ 1417 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1418 pcbddc->use_vertices = PETSC_TRUE; 1419 } 1420 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1421 /* HACKS (the two following code branches) */ 1422 if (!ISForVertices && pcbddc->NullSpace) { 1423 pcbddc->use_change_of_basis = PETSC_TRUE; 1424 pcbddc->use_change_on_faces = PETSC_FALSE; 1425 } 1426 if (pcbddc->NullSpace) { 1427 /* use_change_of_basis should be consistent among processors */ 1428 PetscBool tbool = pcbddc->use_change_of_basis; 1429 ierr = MPI_Allreduce(&tbool,&(pcbddc->use_change_of_basis),1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1430 } 1431 /* print some info */ 1432 if (pcbddc->dbg_flag) { 1433 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1434 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1435 i = 0; 1436 if (ISForVertices) { 1437 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1438 } 1439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1442 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1443 } 1444 /* check if near null space is attached to global mat */ 1445 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1446 if (nearnullsp) { 1447 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1448 /* remove any stored info */ 1449 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1450 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1451 /* store information for BDDC solver reuse */ 1452 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1453 pcbddc->onearnullspace = nearnullsp; 1454 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1455 for (i=0;i<nnsp_size;i++) { 1456 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1457 } 1458 } else { /* if near null space is not provided BDDC uses constants by default */ 1459 nnsp_size = 0; 1460 nnsp_has_cnst = PETSC_TRUE; 1461 } 1462 /* get max number of constraints on a single cc */ 1463 max_constraints = nnsp_size; 1464 if (nnsp_has_cnst) max_constraints++; 1465 1466 /* 1467 Evaluate maximum storage size needed by the procedure 1468 - temp_indices will contain start index of each constraint stored as follows 1469 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1470 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1471 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1472 */ 1473 total_counts = n_ISForFaces+n_ISForEdges; 1474 total_counts *= max_constraints; 1475 n_vertices = 0; 1476 if (ISForVertices) { 1477 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1478 } 1479 total_counts += n_vertices; 1480 ierr = PetscMalloc1((total_counts+1),&temp_indices);CHKERRQ(ierr); 1481 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1482 total_counts = 0; 1483 max_size_of_constraint = 0; 1484 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1485 if (i<n_ISForEdges) { 1486 used_IS = &ISForEdges[i]; 1487 } else { 1488 used_IS = &ISForFaces[i-n_ISForEdges]; 1489 } 1490 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1491 total_counts += j; 1492 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1493 } 1494 total_counts *= max_constraints; 1495 total_counts += n_vertices; 1496 ierr = PetscMalloc1(total_counts,&temp_quadrature_constraint);CHKERRQ(ierr); 1497 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint);CHKERRQ(ierr); 1498 ierr = PetscMalloc1(total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1499 /* get local part of global near null space vectors */ 1500 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1501 for (k=0;k<nnsp_size;k++) { 1502 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1503 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 } 1506 1507 /* whether or not to skip lapack calls */ 1508 skip_lapack = PETSC_TRUE; 1509 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1510 1511 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1512 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1513 PetscScalar temp_work; 1514 #if defined(PETSC_MISSING_LAPACK_GESVD) 1515 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1516 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1517 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1518 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1519 #if defined(PETSC_USE_COMPLEX) 1520 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1521 #endif 1522 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1523 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1524 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1525 lwork = -1; 1526 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1527 #if !defined(PETSC_USE_COMPLEX) 1528 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1529 #else 1530 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1531 #endif 1532 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1533 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1534 #else /* on missing GESVD */ 1535 /* SVD */ 1536 PetscInt max_n,min_n; 1537 max_n = max_size_of_constraint; 1538 min_n = max_constraints; 1539 if (max_size_of_constraint < max_constraints) { 1540 min_n = max_size_of_constraint; 1541 max_n = max_constraints; 1542 } 1543 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1544 #if defined(PETSC_USE_COMPLEX) 1545 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1546 #endif 1547 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1548 lwork = -1; 1549 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1550 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1551 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1552 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1553 #if !defined(PETSC_USE_COMPLEX) 1554 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1555 #else 1556 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1557 #endif 1558 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1559 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1560 #endif /* on missing GESVD */ 1561 /* Allocate optimal workspace */ 1562 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1563 ierr = PetscMalloc1((PetscInt)lwork,&work);CHKERRQ(ierr); 1564 } 1565 /* Now we can loop on constraining sets */ 1566 total_counts = 0; 1567 temp_indices[0] = 0; 1568 /* vertices */ 1569 if (ISForVertices) { 1570 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1571 if (nnsp_has_cnst) { /* consider all vertices */ 1572 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1573 for (i=0;i<n_vertices;i++) { 1574 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1575 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1576 total_counts++; 1577 } 1578 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1579 PetscBool used_vertex; 1580 for (i=0;i<n_vertices;i++) { 1581 used_vertex = PETSC_FALSE; 1582 k = 0; 1583 while (!used_vertex && k<nnsp_size) { 1584 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1585 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1586 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1587 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1588 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1589 total_counts++; 1590 used_vertex = PETSC_TRUE; 1591 } 1592 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1593 k++; 1594 } 1595 } 1596 } 1597 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1598 n_vertices = total_counts; 1599 } 1600 1601 /* edges and faces */ 1602 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1603 if (i<n_ISForEdges) { 1604 used_IS = &ISForEdges[i]; 1605 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1606 } else { 1607 used_IS = &ISForFaces[i-n_ISForEdges]; 1608 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1609 } 1610 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1611 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1612 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1613 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1614 /* change of basis should not be performed on local periodic nodes */ 1615 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1616 if (nnsp_has_cnst) { 1617 PetscScalar quad_value; 1618 temp_constraints++; 1619 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1620 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1621 for (j=0;j<size_of_constraint;j++) { 1622 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1623 } 1624 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1625 total_counts++; 1626 } 1627 for (k=0;k<nnsp_size;k++) { 1628 PetscReal real_value; 1629 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1630 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1631 for (j=0;j<size_of_constraint;j++) { 1632 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1633 } 1634 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1635 /* check if array is null on the connected component */ 1636 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1637 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1638 if (real_value > 0.0) { /* keep indices and values */ 1639 temp_constraints++; 1640 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1641 total_counts++; 1642 } 1643 } 1644 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1645 valid_constraints = temp_constraints; 1646 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1647 if (!pcbddc->use_nnsp_true && temp_constraints) { 1648 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1649 1650 #if defined(PETSC_MISSING_LAPACK_GESVD) 1651 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1652 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1653 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1654 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1655 from that computed using LAPACKgesvd 1656 -> This is due to a different computation of eigenvectors in LAPACKheev 1657 -> The quality of the POD-computed basis will be the same */ 1658 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1659 /* Store upper triangular part of correlation matrix */ 1660 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1661 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1662 for (j=0;j<temp_constraints;j++) { 1663 for (k=0;k<j+1;k++) { 1664 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1665 } 1666 } 1667 /* compute eigenvalues and eigenvectors of correlation matrix */ 1668 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1669 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1670 #if !defined(PETSC_USE_COMPLEX) 1671 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1672 #else 1673 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1674 #endif 1675 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1676 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1677 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1678 j=0; 1679 while (j < temp_constraints && singular_vals[j] < tol) j++; 1680 total_counts=total_counts-j; 1681 valid_constraints = temp_constraints-j; 1682 /* scale and copy POD basis into used quadrature memory */ 1683 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1684 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1685 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1689 if (j<temp_constraints) { 1690 PetscInt ii; 1691 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1692 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1693 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1694 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1695 for (k=0;k<temp_constraints-j;k++) { 1696 for (ii=0;ii<size_of_constraint;ii++) { 1697 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1698 } 1699 } 1700 } 1701 #else /* on missing GESVD */ 1702 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1703 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1704 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1705 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1706 #if !defined(PETSC_USE_COMPLEX) 1707 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1708 #else 1709 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1710 #endif 1711 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1712 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1713 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1714 k = temp_constraints; 1715 if (k > size_of_constraint) k = size_of_constraint; 1716 j = 0; 1717 while (j < k && singular_vals[k-j-1] < tol) j++; 1718 total_counts = total_counts-temp_constraints+k-j; 1719 valid_constraints = k-j; 1720 #endif /* on missing GESVD */ 1721 } 1722 /* setting change_of_basis flag is safe now */ 1723 if (boolforchange) { 1724 for (j=0;j<valid_constraints;j++) { 1725 PetscBTSet(change_basis,total_counts-j-1); 1726 } 1727 } 1728 } 1729 /* free index sets of faces, edges and vertices */ 1730 for (i=0;i<n_ISForFaces;i++) { 1731 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1732 } 1733 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1734 for (i=0;i<n_ISForEdges;i++) { 1735 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1736 } 1737 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1738 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1739 /* map temp_indices_to_constraint in boundary numbering */ 1740 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1741 if (i != temp_indices[total_counts]) { 1742 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1743 } 1744 1745 /* free workspace */ 1746 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1747 ierr = PetscFree(work);CHKERRQ(ierr); 1748 #if defined(PETSC_USE_COMPLEX) 1749 ierr = PetscFree(rwork);CHKERRQ(ierr); 1750 #endif 1751 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1752 #if defined(PETSC_MISSING_LAPACK_GESVD) 1753 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1754 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1755 #endif 1756 } 1757 for (k=0;k<nnsp_size;k++) { 1758 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1759 } 1760 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1761 1762 /* set quantities in pcbddc data structure and store previous primal size */ 1763 /* n_vertices defines the number of subdomain corners in the primal space */ 1764 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1765 olocal_primal_size = pcbddc->local_primal_size; 1766 pcbddc->local_primal_size = total_counts; 1767 pcbddc->n_vertices = n_vertices; 1768 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1769 1770 /* Create constraint matrix */ 1771 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1772 /* so we need to set it up properly either with or without change of basis */ 1773 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1774 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1775 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1776 /* array to compute a local numbering of constraints : vertices first then constraints */ 1777 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 1778 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1779 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1780 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 1781 /* auxiliary stuff for basis change */ 1782 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 1783 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1784 1785 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1786 total_primal_vertices=0; 1787 for (i=0;i<pcbddc->local_primal_size;i++) { 1788 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1789 if (size_of_constraint == 1) { 1790 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1791 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1792 aux_primal_minloc[total_primal_vertices]=0; 1793 total_primal_vertices++; 1794 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1795 PetscInt min_loc,min_index; 1796 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1797 /* find first untouched local node */ 1798 k = 0; 1799 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1800 min_index = global_indices[k]; 1801 min_loc = k; 1802 /* search the minimum among global nodes already untouched on the cc */ 1803 for (k=1;k<size_of_constraint;k++) { 1804 /* there can be more than one constraint on a single connected component */ 1805 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1806 min_index = global_indices[k]; 1807 min_loc = k; 1808 } 1809 } 1810 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1811 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1812 aux_primal_minloc[total_primal_vertices]=min_loc; 1813 total_primal_vertices++; 1814 } 1815 } 1816 /* determine if a QR strategy is needed for change of basis */ 1817 qr_needed = PETSC_FALSE; 1818 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1819 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1820 if (PetscBTLookup(change_basis,i)) { 1821 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1822 j = 0; 1823 for (k=0;k<size_of_constraint;k++) { 1824 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1825 j++; 1826 } 1827 } 1828 /* found more than one primal dof on the cc */ 1829 if (j > 1) { 1830 PetscBTSet(qr_needed_idx,i); 1831 qr_needed = PETSC_TRUE; 1832 } 1833 } 1834 } 1835 /* free workspace */ 1836 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1837 1838 /* permute indices in order to have a sorted set of vertices */ 1839 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1840 1841 /* nonzero structure of constraint matrix */ 1842 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 1843 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1844 j=total_primal_vertices; 1845 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1846 if (!PetscBTLookup(change_basis,i)) { 1847 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1848 j++; 1849 } 1850 } 1851 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1852 ierr = PetscFree(nnz);CHKERRQ(ierr); 1853 /* set values in constraint matrix */ 1854 for (i=0;i<total_primal_vertices;i++) { 1855 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1856 } 1857 total_counts = total_primal_vertices; 1858 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1859 if (!PetscBTLookup(change_basis,i)) { 1860 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1861 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1862 total_counts++; 1863 } 1864 } 1865 /* assembling */ 1866 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1867 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1868 /* 1869 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1870 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1871 */ 1872 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1873 if (pcbddc->use_change_of_basis) { 1874 /* dual and primal dofs on a single cc */ 1875 PetscInt dual_dofs,primal_dofs; 1876 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1877 PetscInt primal_counter; 1878 /* working stuff for GEQRF */ 1879 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 1880 PetscBLASInt lqr_work; 1881 /* working stuff for UNGQR */ 1882 PetscScalar *gqr_work,lgqr_work_t; 1883 PetscBLASInt lgqr_work; 1884 /* working stuff for TRTRS */ 1885 PetscScalar *trs_rhs; 1886 PetscBLASInt Blas_NRHS; 1887 /* pointers for values insertion into change of basis matrix */ 1888 PetscInt *start_rows,*start_cols; 1889 PetscScalar *start_vals; 1890 /* working stuff for values insertion */ 1891 PetscBT is_primal; 1892 1893 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1894 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1895 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1896 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1897 /* work arrays */ 1898 ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr); 1899 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1900 /* nonzeros per row */ 1901 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1902 if (PetscBTLookup(change_basis,i)) { 1903 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1904 if (PetscBTLookup(qr_needed_idx,i)) { 1905 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1906 } else { 1907 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1908 /* get local primal index on the cc */ 1909 j = 0; 1910 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1911 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1912 } 1913 } 1914 } 1915 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1916 ierr = PetscFree(nnz);CHKERRQ(ierr); 1917 /* Set initial identity in the matrix */ 1918 for (i=0;i<pcis->n_B;i++) { 1919 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1920 } 1921 1922 if (pcbddc->dbg_flag) { 1923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1925 } 1926 1927 1928 /* Now we loop on the constraints which need a change of basis */ 1929 /* 1930 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1931 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1932 1933 Basic blocks of change of basis matrix T computed 1934 1935 - Using the following block transformation if there is only a primal dof on the cc 1936 (in the example, primal dof is the last one of the edge in LOCAL ordering 1937 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1938 | 1 0 ... 0 1 | 1939 | 0 1 ... 0 1 | 1940 | ... | 1941 | 0 ... 1 1 | 1942 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1943 1944 - via QR decomposition of constraints otherwise 1945 */ 1946 if (qr_needed) { 1947 /* space to store Q */ 1948 ierr = PetscMalloc1((max_size_of_constraint)*(max_size_of_constraint),&qr_basis);CHKERRQ(ierr); 1949 /* first we issue queries for optimal work */ 1950 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1951 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1953 lqr_work = -1; 1954 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1955 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1956 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1957 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 1958 lgqr_work = -1; 1959 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1960 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1962 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1963 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1964 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1965 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1966 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1967 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 1968 /* array to store scaling factors for reflectors */ 1969 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 1970 /* array to store rhs and solution of triangular solver */ 1971 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 1972 /* allocating workspace for check */ 1973 if (pcbddc->dbg_flag) { 1974 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr); 1975 } 1976 } 1977 /* array to store whether a node is primal or not */ 1978 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1979 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 1980 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1981 if (i != total_primal_vertices) { 1982 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1983 } 1984 for (i=0;i<total_primal_vertices;i++) { 1985 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1986 } 1987 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1988 1989 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1990 /* -> using implicit ordering contained in temp_indices data */ 1991 total_counts = pcbddc->n_vertices; 1992 primal_counter = total_counts; 1993 while (total_counts<pcbddc->local_primal_size) { 1994 primal_dofs = 1; 1995 if (PetscBTLookup(change_basis,total_counts)) { 1996 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1997 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1998 primal_dofs++; 1999 } 2000 /* get constraint info */ 2001 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2002 dual_dofs = size_of_constraint-primal_dofs; 2003 2004 if (pcbddc->dbg_flag) { 2005 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2006 } 2007 2008 if (primal_dofs > 1) { /* QR */ 2009 2010 /* copy quadrature constraints for change of basis check */ 2011 if (pcbddc->dbg_flag) { 2012 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2013 } 2014 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2015 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2016 2017 /* compute QR decomposition of constraints */ 2018 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2019 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2020 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2021 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2022 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2023 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2024 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2025 2026 /* explictly compute R^-T */ 2027 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2028 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2029 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2030 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2032 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2033 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2034 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2035 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2036 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2037 2038 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2039 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2042 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2043 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2044 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2045 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2046 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2047 2048 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2049 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2050 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2052 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2053 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2057 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2058 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2059 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2060 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2061 2062 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2063 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2064 /* insert cols for primal dofs */ 2065 for (j=0;j<primal_dofs;j++) { 2066 start_vals = &qr_basis[j*size_of_constraint]; 2067 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2068 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2069 } 2070 /* insert cols for dual dofs */ 2071 for (j=0,k=0;j<dual_dofs;k++) { 2072 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2073 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2074 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2075 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2076 j++; 2077 } 2078 } 2079 2080 /* check change of basis */ 2081 if (pcbddc->dbg_flag) { 2082 PetscInt ii,jj; 2083 PetscBool valid_qr=PETSC_TRUE; 2084 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2085 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2090 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2091 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2092 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2093 for (jj=0;jj<size_of_constraint;jj++) { 2094 for (ii=0;ii<primal_dofs;ii++) { 2095 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2096 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2097 } 2098 } 2099 if (!valid_qr) { 2100 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2101 for (jj=0;jj<size_of_constraint;jj++) { 2102 for (ii=0;ii<primal_dofs;ii++) { 2103 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2104 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2105 } 2106 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2107 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2108 } 2109 } 2110 } 2111 } else { 2112 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2113 } 2114 } 2115 } else { /* simple transformation block */ 2116 PetscInt row,col; 2117 PetscScalar val; 2118 for (j=0;j<size_of_constraint;j++) { 2119 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2120 if (!PetscBTLookup(is_primal,row)) { 2121 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2122 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2123 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2124 } else { 2125 for (k=0;k<size_of_constraint;k++) { 2126 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2127 if (row != col) { 2128 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2129 } else { 2130 val = 1.0; 2131 } 2132 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2133 } 2134 } 2135 } 2136 if (pcbddc->dbg_flag) { 2137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2138 } 2139 } 2140 /* increment primal counter */ 2141 primal_counter += primal_dofs; 2142 } else { 2143 if (pcbddc->dbg_flag) { 2144 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2145 } 2146 } 2147 /* increment constraint counter total_counts */ 2148 total_counts += primal_dofs; 2149 } 2150 2151 /* free workspace */ 2152 if (qr_needed) { 2153 if (pcbddc->dbg_flag) { 2154 ierr = PetscFree(work);CHKERRQ(ierr); 2155 } 2156 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2157 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2158 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2159 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2160 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2161 } 2162 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2163 /* assembling */ 2164 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 /* 2167 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2168 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2169 */ 2170 } 2171 2172 /* get indices in local ordering for vertices and constraints */ 2173 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2174 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2175 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2176 } 2177 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2178 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2179 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2180 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2181 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2182 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2183 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2184 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2185 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2186 /* set quantities in PCBDDC data struct */ 2187 pcbddc->n_actual_vertices = i; 2188 /* check if a new primal space has been introduced */ 2189 pcbddc->new_primal_space_local = PETSC_TRUE; 2190 if (olocal_primal_size == pcbddc->local_primal_size) { 2191 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2192 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2193 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2194 } 2195 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2196 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2197 2198 /* flush dbg viewer */ 2199 if (pcbddc->dbg_flag) { 2200 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2201 } 2202 2203 /* free workspace */ 2204 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2205 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2206 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2207 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2208 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2209 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2210 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2211 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 #undef __FUNCT__ 2216 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2217 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2218 { 2219 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2220 PC_IS *pcis = (PC_IS*)pc->data; 2221 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2222 PetscInt ierr,i,vertex_size; 2223 PetscViewer viewer=pcbddc->dbg_viewer; 2224 2225 PetscFunctionBegin; 2226 /* Reset previously computed graph */ 2227 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2228 /* Init local Graph struct */ 2229 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2230 2231 /* Check validity of the csr graph passed in by the user */ 2232 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2233 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2234 } 2235 2236 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2237 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2238 Mat mat_adj; 2239 const PetscInt *xadj,*adjncy; 2240 PetscBool flg_row=PETSC_TRUE; 2241 2242 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2243 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2244 if (!flg_row) { 2245 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2246 } 2247 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2248 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2249 if (!flg_row) { 2250 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2251 } 2252 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2253 } 2254 2255 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2256 vertex_size = 1; 2257 if (pcbddc->user_provided_isfordofs) { 2258 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2259 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2260 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2261 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2262 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2263 } 2264 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2265 pcbddc->n_ISForDofs = 0; 2266 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2267 } 2268 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2269 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2270 } else { 2271 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2272 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2273 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2274 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2275 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2276 } 2277 } 2278 } 2279 2280 /* Setup of Graph */ 2281 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2282 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2283 } 2284 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2285 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2286 } 2287 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2288 2289 /* Graph's connected components analysis */ 2290 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2291 2292 /* print some info to stdout */ 2293 if (pcbddc->dbg_flag) { 2294 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2295 } 2296 2297 /* mark topography has done */ 2298 pcbddc->recompute_topography = PETSC_FALSE; 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2304 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2305 { 2306 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2307 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 n = 0; 2312 vertices = 0; 2313 if (pcbddc->ConstraintMatrix) { 2314 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2315 for (i=0;i<local_primal_size;i++) { 2316 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2317 if (size_of_constraint == 1) n++; 2318 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2319 } 2320 if (vertices_idx) { 2321 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 2322 n = 0; 2323 for (i=0;i<local_primal_size;i++) { 2324 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2325 if (size_of_constraint == 1) { 2326 vertices[n++]=row_cmat_indices[0]; 2327 } 2328 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2329 } 2330 } 2331 } 2332 *n_vertices = n; 2333 if (vertices_idx) *vertices_idx = vertices; 2334 PetscFunctionReturn(0); 2335 } 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2339 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2340 { 2341 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2342 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2343 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2344 PetscBT touched; 2345 PetscErrorCode ierr; 2346 2347 /* This function assumes that the number of local constraints per connected component 2348 is not greater than the number of nodes defined for the connected component 2349 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2350 PetscFunctionBegin; 2351 n = 0; 2352 constraints_index = 0; 2353 if (pcbddc->ConstraintMatrix) { 2354 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2355 max_size_of_constraint = 0; 2356 for (i=0;i<local_primal_size;i++) { 2357 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2358 if (size_of_constraint > 1) { 2359 n++; 2360 } 2361 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2362 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2363 } 2364 if (constraints_idx) { 2365 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 2366 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 2367 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2368 n = 0; 2369 for (i=0;i<local_primal_size;i++) { 2370 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2371 if (size_of_constraint > 1) { 2372 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2373 /* find first untouched local node */ 2374 j = 0; 2375 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2376 min_index = row_cmat_global_indices[j]; 2377 min_loc = j; 2378 /* search the minimum among nodes not yet touched on the connected component 2379 since there can be more than one constraint on a single cc */ 2380 for (j=1;j<size_of_constraint;j++) { 2381 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2382 min_index = row_cmat_global_indices[j]; 2383 min_loc = j; 2384 } 2385 } 2386 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2387 constraints_index[n++] = row_cmat_indices[min_loc]; 2388 } 2389 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2390 } 2391 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2392 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2393 } 2394 } 2395 *n_constraints = n; 2396 if (constraints_idx) *constraints_idx = constraints_index; 2397 PetscFunctionReturn(0); 2398 } 2399 2400 /* the next two functions has been adapted from pcis.c */ 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "PCBDDCApplySchur" 2403 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2404 { 2405 PetscErrorCode ierr; 2406 PC_IS *pcis = (PC_IS*)(pc->data); 2407 2408 PetscFunctionBegin; 2409 if (!vec2_B) { vec2_B = v; } 2410 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2411 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2412 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2413 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2414 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 #undef __FUNCT__ 2419 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2420 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2421 { 2422 PetscErrorCode ierr; 2423 PC_IS *pcis = (PC_IS*)(pc->data); 2424 2425 PetscFunctionBegin; 2426 if (!vec2_B) { vec2_B = v; } 2427 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2428 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2429 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2430 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2431 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "PCBDDCSubsetNumbering" 2437 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2438 { 2439 Vec local_vec,global_vec; 2440 IS seqis,paris; 2441 VecScatter scatter_ctx; 2442 PetscScalar *array; 2443 PetscInt *temp_global_dofs; 2444 PetscScalar globalsum; 2445 PetscInt i,j,s; 2446 PetscInt nlocals,first_index,old_index,max_local; 2447 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2448 PetscMPIInt *dof_sizes,*dof_displs; 2449 PetscBool first_found; 2450 PetscErrorCode ierr; 2451 2452 PetscFunctionBegin; 2453 /* mpi buffers */ 2454 MPI_Comm_size(comm,&size_prec_comm); 2455 MPI_Comm_rank(comm,&rank_prec_comm); 2456 j = ( !rank_prec_comm ? size_prec_comm : 0); 2457 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 2458 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 2459 /* get maximum size of subset */ 2460 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 2461 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2462 max_local = 0; 2463 if (n_local_dofs) { 2464 max_local = temp_global_dofs[0]; 2465 for (i=1;i<n_local_dofs;i++) { 2466 if (max_local < temp_global_dofs[i] ) { 2467 max_local = temp_global_dofs[i]; 2468 } 2469 } 2470 } 2471 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2472 max_global++; 2473 max_local = 0; 2474 if (n_local_dofs) { 2475 max_local = local_dofs[0]; 2476 for (i=1;i<n_local_dofs;i++) { 2477 if (max_local < local_dofs[i] ) { 2478 max_local = local_dofs[i]; 2479 } 2480 } 2481 } 2482 max_local++; 2483 /* allocate workspace */ 2484 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2485 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2486 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2487 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2488 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2489 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2490 /* create scatter */ 2491 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2492 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2493 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2494 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2495 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2496 /* init array */ 2497 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2498 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2499 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2500 if (local_dofs_mult) { 2501 for (i=0;i<n_local_dofs;i++) { 2502 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2503 } 2504 } else { 2505 for (i=0;i<n_local_dofs;i++) { 2506 array[local_dofs[i]]=1.0; 2507 } 2508 } 2509 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2510 /* scatter into global vec and get total number of global dofs */ 2511 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2512 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2513 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2514 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2515 /* Fill global_vec with cumulative function for global numbering */ 2516 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2517 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2518 nlocals = 0; 2519 first_index = -1; 2520 first_found = PETSC_FALSE; 2521 for (i=0;i<s;i++) { 2522 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2523 first_found = PETSC_TRUE; 2524 first_index = i; 2525 } 2526 nlocals += (PetscInt)PetscRealPart(array[i]); 2527 } 2528 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2529 if (!rank_prec_comm) { 2530 dof_displs[0]=0; 2531 for (i=1;i<size_prec_comm;i++) { 2532 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2533 } 2534 } 2535 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2536 if (first_found) { 2537 array[first_index] += (PetscScalar)nlocals; 2538 old_index = first_index; 2539 for (i=first_index+1;i<s;i++) { 2540 if (PetscRealPart(array[i]) > 0.0) { 2541 array[i] += array[old_index]; 2542 old_index = i; 2543 } 2544 } 2545 } 2546 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2547 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2548 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2549 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2550 /* get global ordering of local dofs */ 2551 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2552 if (local_dofs_mult) { 2553 for (i=0;i<n_local_dofs;i++) { 2554 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2555 } 2556 } else { 2557 for (i=0;i<n_local_dofs;i++) { 2558 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2559 } 2560 } 2561 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2562 /* free workspace */ 2563 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2564 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2565 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2566 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2567 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2568 /* return pointer to global ordering of local dofs */ 2569 *global_numbering_subset = temp_global_dofs; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 #undef __FUNCT__ 2574 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2575 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2576 { 2577 PetscInt i,j; 2578 PetscScalar *alphas; 2579 PetscErrorCode ierr; 2580 2581 PetscFunctionBegin; 2582 /* this implements stabilized Gram-Schmidt */ 2583 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 2584 for (i=0;i<n;i++) { 2585 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2586 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2587 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2588 } 2589 ierr = PetscFree(alphas);CHKERRQ(ierr); 2590 PetscFunctionReturn(0); 2591 } 2592 2593 /* TODO 2594 - now preallocation is done assuming SEQDENSE local matrices 2595 */ 2596 #undef __FUNCT__ 2597 #define __FUNCT__ "MatISGetMPIXAIJ" 2598 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2599 { 2600 Mat new_mat; 2601 Mat_IS *matis = (Mat_IS*)(mat->data); 2602 /* info on mat */ 2603 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2604 PetscInt bs,rows,cols; 2605 PetscInt lrows,lcols; 2606 PetscInt local_rows,local_cols; 2607 PetscBool isdense; 2608 /* values insertion */ 2609 PetscScalar *array; 2610 PetscInt *local_indices,*global_indices; 2611 /* work */ 2612 PetscInt i,j,index_row; 2613 PetscErrorCode ierr; 2614 2615 PetscFunctionBegin; 2616 /* MISSING CHECKS 2617 - rectangular case not covered (it is not allowed by MATIS) 2618 */ 2619 /* get info from mat */ 2620 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2621 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2622 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2623 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2624 2625 /* work */ 2626 ierr = PetscMalloc1(local_rows,&local_indices);CHKERRQ(ierr); 2627 for (i=0;i<local_rows;i++) local_indices[i]=i; 2628 /* map indices of local mat to global values */ 2629 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2630 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2631 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2632 2633 if (reuse==MAT_INITIAL_MATRIX) { 2634 Vec vec_dnz,vec_onz; 2635 PetscScalar *my_dnz,*my_onz; 2636 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2637 PetscInt index_col,owner; 2638 PetscMPIInt nsubdomains; 2639 2640 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2641 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2642 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2643 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2644 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2645 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2646 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2647 2648 /* 2649 preallocation 2650 */ 2651 2652 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2653 /* 2654 Some vectors are needed to sum up properly on shared interface dofs. 2655 Preallocation macros cannot do the job. 2656 Note that preallocation is not exact, since it overestimates nonzeros 2657 */ 2658 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2659 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2660 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2661 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2662 /* All processes need to compute entire row ownership */ 2663 ierr = PetscMalloc1(rows,&row_ownership);CHKERRQ(ierr); 2664 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2665 for (i=0;i<nsubdomains;i++) { 2666 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2667 row_ownership[j]=i; 2668 } 2669 } 2670 2671 /* 2672 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2673 then, they will be summed up properly. This way, preallocation is always sufficient 2674 */ 2675 ierr = PetscMalloc1(local_rows,&my_dnz);CHKERRQ(ierr); 2676 ierr = PetscMalloc1(local_rows,&my_onz);CHKERRQ(ierr); 2677 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2678 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2679 for (i=0;i<local_rows;i++) { 2680 index_row = global_indices[i]; 2681 for (j=i;j<local_rows;j++) { 2682 owner = row_ownership[index_row]; 2683 index_col = global_indices[j]; 2684 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2685 my_dnz[i] += 1.0; 2686 } else { /* offdiag block */ 2687 my_onz[i] += 1.0; 2688 } 2689 /* same as before, interchanging rows and cols */ 2690 if (i != j) { 2691 owner = row_ownership[index_col]; 2692 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2693 my_dnz[j] += 1.0; 2694 } else { 2695 my_onz[j] += 1.0; 2696 } 2697 } 2698 } 2699 } 2700 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2701 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2702 if (local_rows) { /* multilevel guard */ 2703 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2704 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2705 } 2706 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2707 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2708 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2709 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2710 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2711 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2712 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2713 2714 /* set computed preallocation in dnz and onz */ 2715 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2716 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2717 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2718 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2719 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2720 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2721 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2722 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2723 2724 /* Resize preallocation if overestimated */ 2725 for (i=0;i<lrows;i++) { 2726 dnz[i] = PetscMin(dnz[i],lcols); 2727 onz[i] = PetscMin(onz[i],cols-lcols); 2728 } 2729 /* set preallocation */ 2730 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2731 for (i=0;i<lrows/bs;i++) { 2732 dnz[i] = dnz[i*bs]/bs; 2733 onz[i] = onz[i*bs]/bs; 2734 } 2735 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2736 for (i=0;i<lrows/bs;i++) { 2737 dnz[i] = dnz[i]-i; 2738 } 2739 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2740 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2741 *M = new_mat; 2742 } else { 2743 PetscInt mbs,mrows,mcols; 2744 /* some checks */ 2745 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2746 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2747 if (mrows != rows) { 2748 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2749 } 2750 if (mrows != rows) { 2751 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2752 } 2753 if (mbs != bs) { 2754 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2755 } 2756 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2757 } 2758 /* set local to global mappings */ 2759 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2760 /* Set values */ 2761 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2762 if (isdense) { /* special case for dense local matrices */ 2763 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2764 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2765 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2766 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2767 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2768 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2769 } else { /* very basic values insertion for all other matrix types */ 2770 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2771 for (i=0;i<local_rows;i++) { 2772 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2773 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2774 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2775 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2776 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2777 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2778 } 2779 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2780 } 2781 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 if (isdense) { 2784 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2785 } 2786 PetscFunctionReturn(0); 2787 } 2788 2789 #undef __FUNCT__ 2790 #define __FUNCT__ "MatISGetSubassemblingPattern" 2791 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2792 { 2793 Mat subdomain_adj; 2794 IS new_ranks,ranks_send_to; 2795 MatPartitioning partitioner; 2796 Mat_IS *matis; 2797 PetscInt n_neighs,*neighs,*n_shared,**shared; 2798 PetscInt prank; 2799 PetscMPIInt size,rank,color; 2800 PetscInt *xadj,*adjncy,*oldranks; 2801 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2802 PetscInt i,j,local_size,threshold=0; 2803 PetscErrorCode ierr; 2804 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2805 PetscSubcomm subcomm; 2806 2807 PetscFunctionBegin; 2808 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2809 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2810 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2811 2812 /* Get info on mapping */ 2813 matis = (Mat_IS*)(mat->data); 2814 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2816 2817 /* build local CSR graph of subdomains' connectivity */ 2818 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 2819 xadj[0] = 0; 2820 xadj[1] = PetscMax(n_neighs-1,0); 2821 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 2822 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 2823 2824 if (threshold) { 2825 PetscInt* count,min_threshold; 2826 ierr = PetscMalloc1(local_size,&count);CHKERRQ(ierr); 2827 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2828 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2829 for (j=0;j<n_shared[i];j++) { 2830 count[shared[i][j]] += 1; 2831 } 2832 } 2833 /* adapt threshold since we dont want to lose significant connections */ 2834 min_threshold = n_neighs; 2835 for (i=1;i<n_neighs;i++) { 2836 for (j=0;j<n_shared[i];j++) { 2837 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2838 } 2839 } 2840 threshold = PetscMax(min_threshold+1,threshold); 2841 xadj[1] = 0; 2842 for (i=1;i<n_neighs;i++) { 2843 for (j=0;j<n_shared[i];j++) { 2844 if (count[shared[i][j]] < threshold) { 2845 adjncy[xadj[1]] = neighs[i]; 2846 adjncy_wgt[xadj[1]] = n_shared[i]; 2847 xadj[1]++; 2848 break; 2849 } 2850 } 2851 } 2852 ierr = PetscFree(count);CHKERRQ(ierr); 2853 } else { 2854 if (xadj[1]) { 2855 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2856 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2857 } 2858 } 2859 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2860 if (use_square) { 2861 for (i=0;i<xadj[1];i++) { 2862 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2863 } 2864 } 2865 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2866 2867 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2868 2869 /* 2870 Restrict work on active processes only. 2871 */ 2872 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2873 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2874 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2875 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2876 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2877 if (color) { 2878 ierr = PetscFree(xadj);CHKERRQ(ierr); 2879 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2880 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2881 } else { 2882 PetscInt coarsening_ratio; 2883 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2884 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 2885 prank = rank; 2886 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2887 /* 2888 for (i=0;i<size;i++) { 2889 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2890 } 2891 */ 2892 for (i=0;i<xadj[1];i++) { 2893 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2894 } 2895 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2896 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2897 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2898 2899 /* Partition */ 2900 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2901 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2902 if (use_vwgt) { 2903 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2904 v_wgt[0] = local_size; 2905 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2906 } 2907 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2908 coarsening_ratio = size/n_subdomains; 2909 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2910 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2911 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2912 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2913 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2914 2915 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2916 if (contiguous) { 2917 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2918 } else { 2919 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2920 } 2921 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2922 /* clean up */ 2923 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2924 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2925 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2926 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2927 } 2928 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2929 2930 /* assemble parallel IS for sends */ 2931 i = 1; 2932 if (color) i=0; 2933 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2934 2935 /* get back IS */ 2936 *is_sends = ranks_send_to; 2937 PetscFunctionReturn(0); 2938 } 2939 2940 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2941 2942 #undef __FUNCT__ 2943 #define __FUNCT__ "MatISSubassemble" 2944 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2945 { 2946 Mat local_mat; 2947 Mat_IS *matis; 2948 IS is_sends_internal; 2949 PetscInt rows,cols; 2950 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2951 PetscBool ismatis,isdense,destroy_mat; 2952 ISLocalToGlobalMapping l2gmap; 2953 PetscInt* l2gmap_indices; 2954 const PetscInt* is_indices; 2955 MatType new_local_type; 2956 /* buffers */ 2957 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2958 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2959 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2960 /* MPI */ 2961 MPI_Comm comm,comm_n; 2962 PetscSubcomm subcomm; 2963 PetscMPIInt n_sends,n_recvs,commsize; 2964 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2965 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2966 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2967 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2968 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2969 PetscErrorCode ierr; 2970 2971 PetscFunctionBegin; 2972 /* TODO: add missing checks */ 2973 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2974 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2975 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2976 PetscValidLogicalCollectiveInt(mat,nis,7); 2977 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2978 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2979 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2980 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2981 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2982 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2983 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2984 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2985 PetscInt mrows,mcols,mnrows,mncols; 2986 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2987 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2988 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2989 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2990 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2991 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2992 } 2993 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2994 PetscValidLogicalCollectiveInt(mat,bs,0); 2995 /* prepare IS for sending if not provided */ 2996 if (!is_sends) { 2997 PetscBool pcontig = PETSC_TRUE; 2998 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2999 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3000 } else { 3001 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3002 is_sends_internal = is_sends; 3003 } 3004 3005 /* get pointer of MATIS data */ 3006 matis = (Mat_IS*)mat->data; 3007 3008 /* get comm */ 3009 comm = PetscObjectComm((PetscObject)mat); 3010 3011 /* compute number of sends */ 3012 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3013 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3014 3015 /* compute number of receives */ 3016 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3017 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3018 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3019 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3020 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3021 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3022 ierr = PetscFree(iflags);CHKERRQ(ierr); 3023 3024 /* restrict comm if requested */ 3025 subcomm = 0; 3026 destroy_mat = PETSC_FALSE; 3027 if (restrict_comm) { 3028 PetscMPIInt color,rank,subcommsize; 3029 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3030 color = 0; 3031 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3032 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3033 subcommsize = commsize - subcommsize; 3034 /* check if reuse has been requested */ 3035 if (reuse == MAT_REUSE_MATRIX) { 3036 if (*mat_n) { 3037 PetscMPIInt subcommsize2; 3038 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3039 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3040 comm_n = PetscObjectComm((PetscObject)*mat_n); 3041 } else { 3042 comm_n = PETSC_COMM_SELF; 3043 } 3044 } else { /* MAT_INITIAL_MATRIX */ 3045 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3046 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3047 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3048 comm_n = subcomm->comm; 3049 } 3050 /* flag to destroy *mat_n if not significative */ 3051 if (color) destroy_mat = PETSC_TRUE; 3052 } else { 3053 comm_n = comm; 3054 } 3055 3056 /* prepare send/receive buffers */ 3057 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3058 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3059 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3060 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3061 if (nis) { 3062 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3063 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3064 } 3065 3066 /* Get data from local matrices */ 3067 if (!isdense) { 3068 /* TODO: See below some guidelines on how to prepare the local buffers */ 3069 /* 3070 send_buffer_vals should contain the raw values of the local matrix 3071 send_buffer_idxs should contain: 3072 - MatType_PRIVATE type 3073 - PetscInt size_of_l2gmap 3074 - PetscInt global_row_indices[size_of_l2gmap] 3075 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3076 */ 3077 } else { 3078 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3079 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3080 ierr = PetscMalloc1((i+2),&send_buffer_idxs);CHKERRQ(ierr); 3081 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3082 send_buffer_idxs[1] = i; 3083 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3084 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3085 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3086 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3087 for (i=0;i<n_sends;i++) { 3088 ilengths_vals[is_indices[i]] = len*len; 3089 ilengths_idxs[is_indices[i]] = len+2; 3090 } 3091 } 3092 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3093 /* additional is (if any) */ 3094 if (nis) { 3095 PetscMPIInt psum; 3096 PetscInt j; 3097 for (j=0,psum=0;j<nis;j++) { 3098 PetscInt plen; 3099 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3100 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3101 psum += len+1; /* indices + lenght */ 3102 } 3103 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3104 for (j=0,psum=0;j<nis;j++) { 3105 PetscInt plen; 3106 const PetscInt *is_array_idxs; 3107 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3108 send_buffer_idxs_is[psum] = plen; 3109 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3110 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3111 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3112 psum += plen+1; /* indices + lenght */ 3113 } 3114 for (i=0;i<n_sends;i++) { 3115 ilengths_idxs_is[is_indices[i]] = psum; 3116 } 3117 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3118 } 3119 3120 buf_size_idxs = 0; 3121 buf_size_vals = 0; 3122 buf_size_idxs_is = 0; 3123 for (i=0;i<n_recvs;i++) { 3124 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3125 buf_size_vals += (PetscInt)olengths_vals[i]; 3126 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3127 } 3128 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3129 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3130 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3131 3132 /* get new tags for clean communications */ 3133 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3134 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3135 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3136 3137 /* allocate for requests */ 3138 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3139 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3140 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3141 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3142 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3143 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3144 3145 /* communications */ 3146 ptr_idxs = recv_buffer_idxs; 3147 ptr_vals = recv_buffer_vals; 3148 ptr_idxs_is = recv_buffer_idxs_is; 3149 for (i=0;i<n_recvs;i++) { 3150 source_dest = onodes[i]; 3151 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3152 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3153 ptr_idxs += olengths_idxs[i]; 3154 ptr_vals += olengths_vals[i]; 3155 if (nis) { 3156 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3157 ptr_idxs_is += olengths_idxs_is[i]; 3158 } 3159 } 3160 for (i=0;i<n_sends;i++) { 3161 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3162 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3163 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3164 if (nis) { 3165 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3166 } 3167 } 3168 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3169 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3170 3171 /* assemble new l2g map */ 3172 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3173 ptr_idxs = recv_buffer_idxs; 3174 buf_size_idxs = 0; 3175 for (i=0;i<n_recvs;i++) { 3176 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3177 ptr_idxs += olengths_idxs[i]; 3178 } 3179 ierr = PetscMalloc1(buf_size_idxs,&l2gmap_indices);CHKERRQ(ierr); 3180 ptr_idxs = recv_buffer_idxs; 3181 buf_size_idxs = 0; 3182 for (i=0;i<n_recvs;i++) { 3183 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3184 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3185 ptr_idxs += olengths_idxs[i]; 3186 } 3187 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3188 ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3189 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3190 3191 /* infer new local matrix type from received local matrices type */ 3192 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3193 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3194 if (n_recvs) { 3195 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3196 ptr_idxs = recv_buffer_idxs; 3197 for (i=0;i<n_recvs;i++) { 3198 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3199 new_local_type_private = MATAIJ_PRIVATE; 3200 break; 3201 } 3202 ptr_idxs += olengths_idxs[i]; 3203 } 3204 switch (new_local_type_private) { 3205 case MATDENSE_PRIVATE: 3206 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3207 new_local_type = MATSEQAIJ; 3208 bs = 1; 3209 } else { /* if I receive only 1 dense matrix */ 3210 new_local_type = MATSEQDENSE; 3211 bs = 1; 3212 } 3213 break; 3214 case MATAIJ_PRIVATE: 3215 new_local_type = MATSEQAIJ; 3216 bs = 1; 3217 break; 3218 case MATBAIJ_PRIVATE: 3219 new_local_type = MATSEQBAIJ; 3220 break; 3221 case MATSBAIJ_PRIVATE: 3222 new_local_type = MATSEQSBAIJ; 3223 break; 3224 default: 3225 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3226 break; 3227 } 3228 } else { /* by default, new_local_type is seqdense */ 3229 new_local_type = MATSEQDENSE; 3230 bs = 1; 3231 } 3232 3233 /* create MATIS object if needed */ 3234 if (reuse == MAT_INITIAL_MATRIX) { 3235 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3236 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3237 } else { 3238 /* it also destroys the local matrices */ 3239 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3240 } 3241 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3242 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3243 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3244 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3245 3246 /* set values */ 3247 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3248 ptr_vals = recv_buffer_vals; 3249 ptr_idxs = recv_buffer_idxs; 3250 for (i=0;i<n_recvs;i++) { 3251 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3252 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3253 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3254 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3255 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3256 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3257 } else { 3258 /* TODO */ 3259 } 3260 ptr_idxs += olengths_idxs[i]; 3261 ptr_vals += olengths_vals[i]; 3262 } 3263 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3267 3268 #if 0 3269 if (!restrict_comm) { /* check */ 3270 Vec lvec,rvec; 3271 PetscReal infty_error; 3272 3273 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3274 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3275 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3276 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3277 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3278 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3279 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3280 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3281 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3282 } 3283 #endif 3284 3285 /* assemble new additional is (if any) */ 3286 if (nis) { 3287 PetscInt **temp_idxs,*count_is,j,psum; 3288 3289 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3290 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3291 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3292 ptr_idxs = recv_buffer_idxs_is; 3293 psum = 0; 3294 for (i=0;i<n_recvs;i++) { 3295 for (j=0;j<nis;j++) { 3296 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3297 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3298 psum += plen; 3299 ptr_idxs += plen+1; /* shift pointer to received data */ 3300 } 3301 } 3302 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3303 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3304 for (i=1;i<nis;i++) { 3305 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3306 } 3307 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3308 ptr_idxs = recv_buffer_idxs_is; 3309 for (i=0;i<n_recvs;i++) { 3310 for (j=0;j<nis;j++) { 3311 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3312 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3313 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3314 ptr_idxs += plen+1; /* shift pointer to received data */ 3315 } 3316 } 3317 for (i=0;i<nis;i++) { 3318 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3319 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3320 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3321 } 3322 ierr = PetscFree(count_is);CHKERRQ(ierr); 3323 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3324 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3325 } 3326 /* free workspace */ 3327 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3328 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3329 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3330 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3331 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3332 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3333 if (isdense) { 3334 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3335 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3336 } else { 3337 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3338 } 3339 if (nis) { 3340 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3341 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3342 } 3343 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3344 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3345 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3346 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3347 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3348 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3349 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3350 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3351 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3352 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3353 ierr = PetscFree(onodes);CHKERRQ(ierr); 3354 if (nis) { 3355 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3356 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3357 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3358 } 3359 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3360 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3361 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3362 for (i=0;i<nis;i++) { 3363 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3364 } 3365 } 3366 PetscFunctionReturn(0); 3367 } 3368 3369 /* temporary hack into ksp private data structure */ 3370 #include <petsc-private/kspimpl.h> 3371 3372 #undef __FUNCT__ 3373 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3374 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3375 { 3376 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3377 PC_IS *pcis = (PC_IS*)pc->data; 3378 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3379 MatNullSpace CoarseNullSpace=NULL; 3380 ISLocalToGlobalMapping coarse_islg; 3381 IS coarse_is,*isarray; 3382 PetscInt i,im_active=-1,active_procs=-1; 3383 PetscInt nis,nisdofs,nisneu; 3384 PC pc_temp; 3385 PCType coarse_pc_type; 3386 KSPType coarse_ksp_type; 3387 PetscBool multilevel_requested,multilevel_allowed; 3388 PetscBool setsym,issym,isherm,isbddc,isnn,coarse_reuse; 3389 Mat t_coarse_mat_is; 3390 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3391 PetscMPIInt all_procs; 3392 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3393 PetscBool compute_vecs = PETSC_FALSE; 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 /* Assign global numbering to coarse dofs */ 3398 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3399 compute_vecs = PETSC_TRUE; 3400 PetscInt ocoarse_size; 3401 ocoarse_size = pcbddc->coarse_size; 3402 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3403 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3404 /* see if we can avoid some work */ 3405 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3406 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3407 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3408 coarse_reuse = PETSC_FALSE; 3409 } else { /* we can safely reuse already computed coarse matrix */ 3410 coarse_reuse = PETSC_TRUE; 3411 } 3412 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3413 coarse_reuse = PETSC_FALSE; 3414 } 3415 /* reset any subassembling information */ 3416 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3417 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3418 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3419 coarse_reuse = PETSC_TRUE; 3420 } 3421 3422 /* count "active" (i.e. with positive local size) and "void" processes */ 3423 im_active = !!(pcis->n); 3424 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3425 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3426 void_procs = all_procs-active_procs; 3427 csin_type_simple = PETSC_TRUE; 3428 if (pcbddc->current_level) { 3429 csin_ml = PETSC_TRUE; 3430 ncoarse_ml = void_procs; 3431 csin_ds = PETSC_TRUE; 3432 ncoarse_ds = void_procs; 3433 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3434 } else { 3435 csin_ml = PETSC_FALSE; 3436 ncoarse_ml = all_procs; 3437 if (void_procs) { 3438 csin_ds = PETSC_TRUE; 3439 ncoarse_ds = void_procs; 3440 csin_type_simple = PETSC_FALSE; 3441 } else { 3442 csin_ds = PETSC_FALSE; 3443 ncoarse_ds = all_procs; 3444 } 3445 } 3446 3447 /* 3448 test if we can go multilevel: three conditions must be satisfied: 3449 - we have not exceeded the number of levels requested 3450 - we can actually subassemble the active processes 3451 - we can find a suitable number of MPI processes where we can place the subassembled problem 3452 */ 3453 multilevel_allowed = PETSC_FALSE; 3454 multilevel_requested = PETSC_FALSE; 3455 if (pcbddc->current_level < pcbddc->max_levels) { 3456 multilevel_requested = PETSC_TRUE; 3457 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3458 multilevel_allowed = PETSC_FALSE; 3459 } else { 3460 multilevel_allowed = PETSC_TRUE; 3461 } 3462 } 3463 /* determine number of process partecipating to coarse solver */ 3464 if (multilevel_allowed) { 3465 ncoarse = ncoarse_ml; 3466 csin = csin_ml; 3467 } else { 3468 ncoarse = ncoarse_ds; 3469 csin = csin_ds; 3470 } 3471 3472 /* creates temporary l2gmap and IS for coarse indexes */ 3473 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3474 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3475 3476 /* creates temporary MATIS object for coarse matrix */ 3477 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3478 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3479 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3480 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3481 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3482 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3483 3484 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3485 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3486 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3487 const PetscInt *idxs; 3488 ISLocalToGlobalMapping tmap; 3489 3490 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3491 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3492 /* allocate space for temporary storage */ 3493 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3494 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3495 /* allocate for IS array */ 3496 nisdofs = pcbddc->n_ISForDofsLocal; 3497 nisneu = !!pcbddc->NeumannBoundariesLocal; 3498 nis = nisdofs + nisneu; 3499 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3500 /* dofs splitting */ 3501 for (i=0;i<nisdofs;i++) { 3502 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3503 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3504 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3505 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3506 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3507 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3509 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3510 } 3511 /* neumann boundaries */ 3512 if (pcbddc->NeumannBoundariesLocal) { 3513 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3514 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3515 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3516 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3517 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3518 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3519 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3520 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3521 } 3522 /* free memory */ 3523 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3524 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3525 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3526 } else { 3527 nis = 0; 3528 nisdofs = 0; 3529 nisneu = 0; 3530 isarray = NULL; 3531 } 3532 /* destroy no longer needed map */ 3533 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3534 3535 /* restrict on coarse candidates (if needed) */ 3536 coarse_mat_is = NULL; 3537 if (csin) { 3538 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3539 PetscInt j,tissize,*nisindices; 3540 PetscInt *coarse_candidates; 3541 const PetscInt* tisindices; 3542 /* get coarse candidates' ranks in pc communicator */ 3543 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3544 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3545 for (i=0,j=0;i<all_procs;i++) { 3546 if (!coarse_candidates[i]) { 3547 coarse_candidates[j]=i; 3548 j++; 3549 } 3550 } 3551 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3552 /* get a suitable subassembling pattern */ 3553 if (csin_type_simple) { 3554 PetscMPIInt rank; 3555 PetscInt issize,isidx; 3556 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3557 if (im_active) { 3558 issize = 1; 3559 isidx = (PetscInt)rank; 3560 } else { 3561 issize = 0; 3562 isidx = -1; 3563 } 3564 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3565 } else { 3566 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3567 } 3568 if (pcbddc->dbg_flag) { 3569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3571 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3573 for (i=0;i<j;i++) { 3574 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3575 } 3576 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3578 } 3579 /* shift the pattern on coarse candidates */ 3580 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3581 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3582 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3583 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3584 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3585 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3586 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3587 } 3588 if (pcbddc->dbg_flag) { 3589 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3590 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3591 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3593 } 3594 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3595 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3596 } else { 3597 if (pcbddc->dbg_flag) { 3598 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3599 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3600 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3601 } 3602 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3603 coarse_mat_is = t_coarse_mat_is; 3604 } 3605 3606 /* create local to global scatters for coarse problem */ 3607 if (compute_vecs) { 3608 PetscInt lrows; 3609 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3610 if (coarse_mat_is) { 3611 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3612 } else { 3613 lrows = 0; 3614 } 3615 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3616 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3617 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3618 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3619 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3620 } 3621 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3622 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3623 3624 /* set defaults for coarse KSP and PC */ 3625 if (multilevel_allowed) { 3626 coarse_ksp_type = KSPRICHARDSON; 3627 coarse_pc_type = PCBDDC; 3628 } else { 3629 coarse_ksp_type = KSPPREONLY; 3630 coarse_pc_type = PCREDUNDANT; 3631 } 3632 3633 /* print some info if requested */ 3634 if (pcbddc->dbg_flag) { 3635 if (!multilevel_allowed) { 3636 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3637 if (multilevel_requested) { 3638 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3639 } else if (pcbddc->max_levels) { 3640 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3641 } 3642 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3643 } 3644 } 3645 3646 /* create the coarse KSP object only once with defaults */ 3647 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3648 issym = PETSC_FALSE; 3649 isherm = PETSC_FALSE; 3650 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3651 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3652 if (coarse_mat_is) { 3653 MatReuse coarse_mat_reuse; 3654 PetscViewer dbg_viewer; 3655 if (pcbddc->dbg_flag) { 3656 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3657 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3658 } 3659 if (!pcbddc->coarse_ksp) { 3660 char prefix[256],str_level[16]; 3661 size_t len; 3662 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3663 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3664 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3665 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr); 3666 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3667 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3668 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3669 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3670 /* prefix */ 3671 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3672 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3673 if (!pcbddc->current_level) { 3674 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3675 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3676 } else { 3677 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3678 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3679 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3680 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3681 *(prefix+len)='\0'; 3682 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3683 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3684 } 3685 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3686 /* allow user customization */ 3687 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3688 } 3689 3690 /* get some info after set from options */ 3691 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3692 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3693 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3694 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3695 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3696 isbddc = PETSC_FALSE; 3697 } 3698 3699 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3700 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3701 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3702 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3703 if (nisdofs) { 3704 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3705 for (i=0;i<nisdofs;i++) { 3706 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3707 } 3708 } 3709 if (nisneu) { 3710 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3711 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3712 } 3713 3714 /* assemble coarse matrix */ 3715 if (coarse_reuse) { 3716 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3717 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3718 coarse_mat_reuse = MAT_REUSE_MATRIX; 3719 } else { 3720 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3721 } 3722 if (isbddc || isnn) { 3723 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3724 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3725 if (pcbddc->dbg_flag) { 3726 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3727 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3728 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3729 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3730 } 3731 } 3732 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3733 } else { 3734 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3735 } 3736 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3737 3738 /* propagate symmetry info to coarse matrix */ 3739 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3740 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3741 3742 /* set operators */ 3743 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3744 if (pcbddc->dbg_flag) { 3745 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3746 } 3747 } else { /* processes non partecipating to coarse solver (if any) */ 3748 coarse_mat = 0; 3749 } 3750 ierr = PetscFree(isarray);CHKERRQ(ierr); 3751 3752 /* Compute coarse null space (special handling by BDDC only) */ 3753 if (pcbddc->NullSpace) { 3754 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3755 } 3756 3757 if (pcbddc->coarse_ksp) { 3758 Vec crhs,csol; 3759 PetscBool ispreonly; 3760 if (CoarseNullSpace) { 3761 if (isbddc) { 3762 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3763 } else { 3764 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3765 } 3766 } 3767 /* setup coarse ksp */ 3768 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3769 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3770 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3771 /* hack */ 3772 if (!csol) { 3773 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3774 } 3775 if (!crhs) { 3776 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3777 } 3778 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3779 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3780 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3781 KSP check_ksp; 3782 KSPType check_ksp_type; 3783 PC check_pc; 3784 Vec check_vec,coarse_vec; 3785 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3786 PetscInt its; 3787 PetscBool compute_eigs; 3788 PetscReal *eigs_r,*eigs_c; 3789 PetscInt neigs; 3790 3791 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3792 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3793 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3794 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3795 if (ispreonly) { 3796 check_ksp_type = KSPPREONLY; 3797 compute_eigs = PETSC_FALSE; 3798 } else { 3799 check_ksp_type = KSPGMRES; 3800 compute_eigs = PETSC_TRUE; 3801 } 3802 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3803 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3804 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3805 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3806 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3807 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3808 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3809 /* create random vec */ 3810 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3811 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3812 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3813 if (CoarseNullSpace) { 3814 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3815 } 3816 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3817 /* solve coarse problem */ 3818 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3819 if (CoarseNullSpace) { 3820 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3821 } 3822 /* set eigenvalue estimation if preonly has not been requested */ 3823 if (compute_eigs) { 3824 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3825 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3826 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3827 lambda_max = eigs_r[neigs-1]; 3828 lambda_min = eigs_r[0]; 3829 if (pcbddc->use_coarse_estimates) { 3830 if (lambda_max>lambda_min) { 3831 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3832 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3833 } 3834 } 3835 } 3836 3837 /* check coarse problem residual error */ 3838 if (pcbddc->dbg_flag) { 3839 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3840 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3841 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3842 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3843 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3844 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3845 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3846 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3847 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3848 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3849 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3850 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3851 if (compute_eigs) { 3852 PetscReal lambda_max_s,lambda_min_s; 3853 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3854 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3855 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3856 for (i=0;i<neigs;i++) { 3857 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3858 } 3859 } 3860 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3861 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3862 } 3863 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3864 if (compute_eigs) { 3865 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3866 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3867 } 3868 } 3869 } 3870 /* print additional info */ 3871 if (pcbddc->dbg_flag) { 3872 /* waits until all processes reaches this point */ 3873 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3874 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3875 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3876 } 3877 3878 /* free memory */ 3879 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3880 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3881 PetscFunctionReturn(0); 3882 } 3883 3884 #undef __FUNCT__ 3885 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3886 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3887 { 3888 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3889 PC_IS* pcis = (PC_IS*)pc->data; 3890 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3891 PetscInt i,coarse_size; 3892 PetscInt *local_primal_indices; 3893 PetscErrorCode ierr; 3894 3895 PetscFunctionBegin; 3896 /* Compute global number of coarse dofs */ 3897 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3898 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3899 } 3900 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3901 3902 /* check numbering */ 3903 if (pcbddc->dbg_flag) { 3904 PetscScalar coarsesum,*array; 3905 3906 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3907 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3908 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3909 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3910 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3911 for (i=0;i<pcbddc->local_primal_size;i++) { 3912 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3913 } 3914 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3915 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3916 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3917 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3918 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3919 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3920 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3921 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3922 for (i=0;i<pcis->n;i++) { 3923 if (array[i] == 1.0) { 3924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3925 } 3926 } 3927 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3928 for (i=0;i<pcis->n;i++) { 3929 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3930 } 3931 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3932 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3933 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3934 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3935 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3936 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3937 if (pcbddc->dbg_flag > 1) { 3938 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3939 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3940 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3941 for (i=0;i<pcbddc->local_primal_size;i++) { 3942 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3943 } 3944 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3945 } 3946 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3947 } 3948 /* get back data */ 3949 *coarse_size_n = coarse_size; 3950 *local_primal_indices_n = local_primal_indices; 3951 PetscFunctionReturn(0); 3952 } 3953 3954 #undef __FUNCT__ 3955 #define __FUNCT__ "PCBDDCGlobalToLocal" 3956 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3957 { 3958 PC_IS* pcis = (PC_IS*)pc->data; 3959 IS localis_t; 3960 PetscInt i,lsize,*idxs; 3961 PetscScalar *vals; 3962 PetscErrorCode ierr; 3963 3964 PetscFunctionBegin; 3965 /* get dirichlet indices in local ordering exploiting local to global map */ 3966 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3967 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3968 for (i=0;i<lsize;i++) vals[i] = 1.0; 3969 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3970 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3971 if (idxs) { /* multilevel guard */ 3972 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3973 } 3974 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3975 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3976 ierr = PetscFree(vals);CHKERRQ(ierr); 3977 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3978 /* now compute dirichlet set in local ordering */ 3979 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3980 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3981 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3982 for (i=0,lsize=0;i<pcis->n;i++) { 3983 if (vals[i] == 1.0) { 3984 lsize++; 3985 } 3986 } 3987 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3988 for (i=0,lsize=0;i<pcis->n;i++) { 3989 if (vals[i] == 1.0) { 3990 idxs[lsize++] = i; 3991 } 3992 } 3993 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3994 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3995 *localis = localis_t; 3996 PetscFunctionReturn(0); 3997 } 3998