1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_constraints,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 131 n_R = pcis->n-pcbddc->n_actual_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (pcbddc->local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 189 const PetscScalar *array,*row_cmat_values; 190 const PetscInt *row_cmat_indices,*idx_R_local; 191 PetscInt *idx_V_B,*auxindices; 192 PetscInt n_vertices,n_constraints,size_of_constraint; 193 PetscInt i,j,n_R,n_D,n_B; 194 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 195 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 196 MatType impMatType; 197 /* some shortcuts to scalars */ 198 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 199 /* for debugging purposes */ 200 PetscReal *coarsefunctions_errors,*constraints_errors; 201 202 PetscFunctionBegin; 203 /* get number of vertices (corners plus constraints with change of basis) 204 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 205 n_vertices = pcbddc->n_actual_vertices; 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 220 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 221 if (i != n_vertices) { 222 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 223 } 224 225 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 226 if (n_constraints) { 227 /* see if we can save some allocations */ 228 if (pcbddc->local_auxmat2) { 229 PetscInt on_R,on_constraints; 230 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 231 if (on_R != n_R || on_constraints != n_constraints) { 232 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 233 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 234 } 235 } 236 /* work vectors */ 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 239 /* auxiliary matrices */ 240 if (!pcbddc->local_auxmat2) { 241 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 242 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 243 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 244 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 245 } 246 247 /* Extract constraints on R nodes: C_{CR} */ 248 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 249 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 250 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 251 252 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 253 for (i=0;i<n_constraints;i++) { 254 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 255 /* Get row of constraint matrix in R numbering */ 256 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 257 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 258 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 260 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 261 /* Solve for row of constraint matrix in R numbering */ 262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 263 /* Set values in local_auxmat2 */ 264 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 265 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 } 268 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 271 272 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 273 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 274 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 275 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 276 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 277 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(M1);CHKERRQ(ierr); 279 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 280 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 281 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 282 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 284 ierr = MatDestroy(&M2);CHKERRQ(ierr); 285 ierr = MatDestroy(&M3);CHKERRQ(ierr); 286 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 287 if (!pcbddc->local_auxmat1) { 288 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 289 } else { 290 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } 292 } 293 294 /* Get submatrices from subdomain matrix */ 295 if (n_vertices) { 296 PetscInt ibs,mbs; 297 PetscBool issbaij; 298 Mat newmat; 299 300 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 301 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 302 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 303 if (ibs != mbs) { /* need to convert to SEQAIJ */ 304 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 308 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 309 } else { 310 /* this is safe */ 311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 313 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 314 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 315 /* which of the two approaches is faster? */ 316 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 317 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 318 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 320 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 321 } else { 322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 } 325 } 326 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 327 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 328 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 329 } 330 331 /* Matrix of coarse basis functions (local) */ 332 if (pcbddc->coarse_phi_B) { 333 PetscInt on_B,on_primal; 334 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 335 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 336 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 337 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 338 } 339 } 340 if (pcbddc->coarse_phi_D) { 341 PetscInt on_D,on_primal; 342 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 343 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 344 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 345 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 346 } 347 } 348 if (!pcbddc->coarse_phi_B) { 349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 350 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 351 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 352 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 } 354 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 355 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 356 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 357 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 358 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 359 } 360 361 if (pcbddc->dbg_flag) { 362 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 364 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 365 } 366 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 367 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 368 369 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 370 371 /* vertices */ 372 for (i=0;i<n_vertices;i++) { 373 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 374 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 375 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 376 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 379 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 380 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 383 if (n_constraints) { 384 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 385 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 387 } 388 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 389 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 390 391 /* Set values in coarse basis function and subdomain part of coarse_mat */ 392 /* coarse basis functions */ 393 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 400 if (pcbddc->switch_static || pcbddc->dbg_flag) { 401 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 } 407 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 408 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 409 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 if (n_constraints) { 412 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 } 416 417 /* check */ 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on local nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 426 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 427 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 428 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 if (n_constraints) { 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 } 437 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 440 /* check saddle point solution */ 441 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 444 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 445 /* shift by the identity matrix */ 446 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 447 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 450 } 451 } 452 453 /* constraints */ 454 for (i=0;i<n_constraints;i++) { 455 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 456 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 457 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 458 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 459 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 460 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 461 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 462 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 463 if (n_vertices) { 464 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 465 } 466 /* Set values in coarse basis function and subdomain part of coarse_mat */ 467 /* coarse basis functions */ 468 j = i+n_vertices; /* don't touch this! */ 469 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 470 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 473 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 474 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 476 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 } 482 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 483 if (n_vertices) { 484 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 485 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 } 488 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 492 if (pcbddc->dbg_flag) { 493 /* assemble subdomain vector on nodes */ 494 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 495 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 496 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 499 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 500 /* assemble subdomain vector of lagrange multipliers */ 501 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 513 /* check saddle point solution */ 514 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 517 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 518 /* shift by the identity matrix */ 519 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 520 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 523 } 524 } 525 /* call assembling routines for local coarse basis */ 526 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 if (pcbddc->switch_static || pcbddc->dbg_flag) { 529 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 } 532 533 /* compute other basis functions for non-symmetric problems */ 534 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 535 if (!setsym || (setsym && !issym)) { 536 if (!pcbddc->coarse_psi_B) { 537 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 538 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 540 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 541 } 542 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 543 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 544 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 545 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 546 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 547 } 548 for (i=0;i<pcbddc->local_primal_size;i++) { 549 if (n_constraints) { 550 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 551 for (j=0;j<n_constraints;j++) { 552 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 553 } 554 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 555 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 556 } 557 if (i<n_vertices) { 558 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 559 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 560 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 561 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 562 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 563 if (n_constraints) { 564 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 565 } 566 } else { 567 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 568 } 569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 570 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 574 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 if (i<n_vertices) { 577 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 578 } 579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 580 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 583 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 584 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 } 586 587 if (pcbddc->dbg_flag) { 588 /* assemble subdomain vector on nodes */ 589 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 590 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 591 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 if (i<n_vertices) { 594 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 595 } 596 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 597 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 598 /* assemble subdomain vector of lagrange multipliers */ 599 for (j=0;j<pcbddc->local_primal_size;j++) { 600 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 601 } 602 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 604 /* check saddle point solution */ 605 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 608 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 609 /* shift by the identity matrix */ 610 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 611 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 614 } 615 } 616 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 619 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 } 622 unsymmetric_check = PETSC_TRUE; 623 } else { /* take references to already computed coarse basis */ 624 unsymmetric_check = PETSC_FALSE; 625 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 626 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 627 if (pcbddc->coarse_phi_D) { 628 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 629 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 630 } 631 } 632 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 633 /* Checking coarse_sub_mat and coarse basis functios */ 634 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 635 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 if (pcbddc->dbg_flag) { 637 Mat coarse_sub_mat; 638 Mat AUXMAT,TM1,TM2,TM3,TM4; 639 Mat coarse_phi_D,coarse_phi_B; 640 Mat coarse_psi_D,coarse_psi_B; 641 Mat A_II,A_BB,A_IB,A_BI; 642 MatType checkmattype=MATSEQAIJ; 643 PetscReal real_value; 644 645 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 646 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 649 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 651 if (unsymmetric_check) { 652 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 653 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 654 } 655 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 656 657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 660 if (unsymmetric_check) { 661 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 662 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 664 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 665 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 666 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 667 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 668 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 670 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 671 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 673 } else { 674 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 675 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 676 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 677 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 678 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 679 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 680 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 681 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 682 } 683 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 684 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 687 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 688 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 689 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 694 for (i=0;i<pcbddc->local_primal_size;i++) { 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 698 for (i=0;i<pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 700 } 701 if (unsymmetric_check) { 702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 703 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 705 } 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 707 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 709 } 710 } 711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 712 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 716 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 720 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 722 if (unsymmetric_check) { 723 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 724 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 725 } 726 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 727 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 728 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 729 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 730 } 731 /* free memory */ 732 if (n_vertices) { 733 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 734 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 738 } 739 if (n_constraints) { 740 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 741 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 742 ierr = MatDestroy(&M1);CHKERRQ(ierr); 743 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 744 } 745 ierr = PetscFree(auxindices);CHKERRQ(ierr); 746 /* get back data */ 747 *coarse_submat_vals_n = coarse_submat_vals; 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 753 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 754 { 755 PC_IS* pcis = (PC_IS*)(pc->data); 756 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 757 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 758 PetscBool issbaij,isseqaij; 759 /* manage repeated solves */ 760 MatReuse reuse; 761 MatStructure matstruct; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 770 reuse = MAT_INITIAL_MATRIX; 771 if (pc->setupcalled) { 772 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 773 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 774 if (matstruct == SAME_NONZERO_PATTERN) { 775 reuse = MAT_REUSE_MATRIX; 776 } else { 777 reuse = MAT_INITIAL_MATRIX; 778 } 779 } 780 if (reuse == MAT_INITIAL_MATRIX) { 781 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 783 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 784 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 785 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 786 } 787 788 /* transform local matrices if needed */ 789 if (pcbddc->use_change_of_basis) { 790 Mat change_mat_all; 791 PetscScalar *row_cmat_values; 792 PetscInt *row_cmat_indices; 793 PetscInt *nnz,*is_indices,*temp_indices; 794 PetscInt i,j,k,n_D,n_B; 795 796 /* Get Non-overlapping dimensions */ 797 n_B = pcis->n_B; 798 n_D = pcis->n-n_B; 799 800 /* compute nonzero structure of change of basis on all local nodes */ 801 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 804 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 806 k=1; 807 for (i=0;i<n_B;i++) { 808 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 nnz[is_indices[i]]=j; 810 if (k < j) k = j; 811 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 812 } 813 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 814 /* assemble change of basis matrix on the whole set of local dofs */ 815 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 816 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 817 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 818 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 819 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 820 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 821 for (i=0;i<n_D;i++) { 822 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 823 } 824 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 826 for (i=0;i<n_B;i++) { 827 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 829 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 830 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 831 } 832 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 834 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 835 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 836 if (isseqaij) { 837 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 838 } else { 839 Mat work_mat; 840 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 841 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 842 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 843 } 844 /* 845 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 846 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 847 */ 848 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 849 ierr = PetscFree(nnz);CHKERRQ(ierr); 850 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 851 } else { 852 /* without change of basis, the local matrix is unchanged */ 853 if (!pcbddc->local_mat) { 854 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 855 pcbddc->local_mat = matis->A; 856 } 857 } 858 859 /* get submatrices */ 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 862 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 863 if (!issbaij) { 864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 865 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 866 } else { 867 Mat newmat; 868 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 869 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 870 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 871 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 872 } 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 878 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 879 { 880 PC_IS* pcis = (PC_IS*)(pc->data); 881 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 882 IS is_aux1,is_aux2; 883 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 884 PetscInt n_vertices,i,j,n_R,n_D,n_B; 885 PetscInt vbs,bs; 886 PetscBT bitmask; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 /* 891 No need to setup local scatters if 892 - primal space is unchanged 893 AND 894 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 895 AND 896 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 897 */ 898 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 899 PetscFunctionReturn(0); 900 } 901 /* destroy old objects */ 902 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 903 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 904 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 905 /* Set Non-overlapping dimensions */ 906 n_B = pcis->n_B; n_D = pcis->n - n_B; 907 n_vertices = pcbddc->n_actual_vertices; 908 /* create auxiliary bitmask */ 909 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 910 for (i=0;i<n_vertices;i++) { 911 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 912 } 913 914 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 915 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 916 for (i=0, n_R=0; i<pcis->n; i++) { 917 if (!PetscBTLookup(bitmask,i)) { 918 idx_R_local[n_R] = i; 919 n_R++; 920 } 921 } 922 923 /* Block code */ 924 vbs = 1; 925 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 926 if (bs>1 && !(n_vertices%bs)) { 927 PetscBool is_blocked = PETSC_TRUE; 928 PetscInt *vary; 929 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 930 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 931 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 932 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 933 for (i=0; i<n_vertices; i++) { 934 if (vary[i]!=0 && vary[i]!=bs) { 935 is_blocked = PETSC_FALSE; 936 break; 937 } 938 } 939 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 940 vbs = bs; 941 for (i=0;i<n_R/vbs;i++) { 942 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 943 } 944 } 945 ierr = PetscFree(vary);CHKERRQ(ierr); 946 } 947 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 948 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 949 950 /* print some info if requested */ 951 if (pcbddc->dbg_flag) { 952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 953 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 960 } 961 962 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 963 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 964 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 965 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 966 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 967 for (i=0; i<n_D; i++) { 968 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 969 } 970 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 971 for (i=0, j=0; i<n_R; i++) { 972 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 973 aux_array1[j++] = i; 974 } 975 } 976 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 977 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 978 for (i=0, j=0; i<n_B; i++) { 979 if (!PetscBTLookup(bitmask,is_indices[i])) { 980 aux_array2[j++] = i; 981 } 982 } 983 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 984 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 985 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 987 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 988 989 if (pcbddc->switch_static || pcbddc->dbg_flag) { 990 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 991 for (i=0, j=0; i<n_R; i++) { 992 if (PetscBTLookup(bitmask,idx_R_local[i])) { 993 aux_array1[j++] = i; 994 } 995 } 996 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 997 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 998 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 999 } 1000 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1001 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1008 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1009 { 1010 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1011 PC_IS *pcis = (PC_IS*)pc->data; 1012 PC pc_temp; 1013 Mat A_RR; 1014 MatStructure matstruct; 1015 MatReuse reuse; 1016 PetscScalar m_one = -1.0; 1017 PetscReal value; 1018 PetscInt n_D,n_R,ibs,mbs; 1019 PetscBool use_exact,use_exact_reduced,issbaij; 1020 PetscErrorCode ierr; 1021 /* prefixes stuff */ 1022 char dir_prefix[256],neu_prefix[256],str_level[16]; 1023 size_t len; 1024 1025 PetscFunctionBegin; 1026 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1027 1028 /* compute prefixes */ 1029 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1030 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1031 if (!pcbddc->current_level) { 1032 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1033 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1034 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1035 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1036 } else { 1037 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1038 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1039 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1040 len -= 15; /* remove "pc_bddc_coarse_" */ 1041 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1042 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1043 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1044 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1045 *(dir_prefix+len)='\0'; 1046 *(neu_prefix+len)='\0'; 1047 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1048 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1049 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1050 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1051 } 1052 1053 /* DIRICHLET PROBLEM */ 1054 /* Matrix for Dirichlet problem is pcis->A_II */ 1055 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1056 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1057 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1058 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1059 /* default */ 1060 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1061 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1062 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1063 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1064 if (issbaij) { 1065 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1066 } else { 1067 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1068 } 1069 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1070 /* Allow user's customization */ 1071 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1072 } 1073 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1074 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1075 if (!n_D) { 1076 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1077 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1078 } 1079 /* Set Up KSP for Dirichlet problem of BDDC */ 1080 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1081 /* set ksp_D into pcis data */ 1082 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1084 pcis->ksp_D = pcbddc->ksp_D; 1085 1086 /* NEUMANN PROBLEM */ 1087 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1088 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1089 if (pcbddc->ksp_R) { /* already created ksp */ 1090 PetscInt nn_R; 1091 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1092 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1093 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1094 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1095 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1096 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1097 reuse = MAT_INITIAL_MATRIX; 1098 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1099 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1100 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1101 reuse = MAT_INITIAL_MATRIX; 1102 } else { /* safe to reuse the matrix */ 1103 reuse = MAT_REUSE_MATRIX; 1104 } 1105 } 1106 /* last check */ 1107 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1108 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1109 reuse = MAT_INITIAL_MATRIX; 1110 } 1111 } else { /* first time, so we need to create the matrix */ 1112 reuse = MAT_INITIAL_MATRIX; 1113 } 1114 /* extract A_RR */ 1115 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1116 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1117 if (ibs != mbs) { 1118 Mat newmat; 1119 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1120 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1121 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1122 } else { 1123 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1124 } 1125 if (!pcbddc->ksp_R) { /* create object if not present */ 1126 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1127 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1128 /* default */ 1129 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1130 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1131 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1132 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1133 if (issbaij) { 1134 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1135 } else { 1136 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1137 } 1138 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1139 /* Allow user's customization */ 1140 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1141 } 1142 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1143 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1144 if (!n_R) { 1145 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1146 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1147 } 1148 /* Set Up KSP for Neumann problem of BDDC */ 1149 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1150 1151 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1152 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1153 /* Dirichlet */ 1154 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1155 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1156 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1157 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1158 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1159 /* need to be adapted? */ 1160 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1161 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1162 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1163 /* print info */ 1164 if (pcbddc->dbg_flag) { 1165 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1166 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1168 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1170 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1171 } 1172 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1173 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1174 } 1175 1176 /* Neumann */ 1177 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1178 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1179 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1180 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1181 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1182 /* need to be adapted? */ 1183 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1184 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1185 /* print info */ 1186 if (pcbddc->dbg_flag) { 1187 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1189 } 1190 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1191 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1192 } 1193 } 1194 /* free Neumann problem's matrix */ 1195 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1201 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1202 { 1203 PetscErrorCode ierr; 1204 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1205 1206 PetscFunctionBegin; 1207 if (applytranspose) { 1208 if (pcbddc->local_auxmat1) { 1209 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1210 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1211 } 1212 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1213 } else { 1214 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1215 if (pcbddc->local_auxmat1) { 1216 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1217 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1218 } 1219 } 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1226 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1227 { 1228 PetscErrorCode ierr; 1229 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1230 PC_IS* pcis = (PC_IS*) (pc->data); 1231 const PetscScalar zero = 0.0; 1232 1233 PetscFunctionBegin; 1234 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1235 if (applytranspose) { 1236 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1237 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1238 } else { 1239 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1240 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1241 } 1242 /* start communications from local primal nodes to rhs of coarse solver */ 1243 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1244 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1248 /* TODO remove null space when doing multilevel */ 1249 if (pcbddc->coarse_ksp) { 1250 if (applytranspose) { 1251 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1252 } else { 1253 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1254 } 1255 } 1256 /* start communications from coarse solver solution to local primal nodes */ 1257 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 1259 /* Local solution on R nodes */ 1260 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1261 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1263 if (pcbddc->switch_static) { 1264 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1266 } 1267 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1268 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1269 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 if (pcbddc->switch_static) { 1272 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 } 1275 1276 /* complete communications from coarse sol to local primal nodes */ 1277 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1278 1279 /* Sum contributions from two levels */ 1280 if (applytranspose) { 1281 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1282 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1283 } else { 1284 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1285 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1286 } 1287 PetscFunctionReturn(0); 1288 } 1289 1290 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1293 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1294 { 1295 PetscErrorCode ierr; 1296 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1297 PetscScalar *array,*array2; 1298 Vec from,to; 1299 1300 PetscFunctionBegin; 1301 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1302 from = pcbddc->coarse_vec; 1303 to = pcbddc->vec1_P; 1304 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1305 Vec tvec; 1306 PetscInt lsize; 1307 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1308 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1309 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1310 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1311 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1312 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1313 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1314 } 1315 } else { /* from local to global -> put data in coarse right hand side */ 1316 from = pcbddc->vec1_P; 1317 to = pcbddc->coarse_vec; 1318 } 1319 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1325 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1326 { 1327 PetscErrorCode ierr; 1328 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1329 PetscScalar *array,*array2; 1330 Vec from,to; 1331 1332 PetscFunctionBegin; 1333 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1334 from = pcbddc->coarse_vec; 1335 to = pcbddc->vec1_P; 1336 } else { /* from local to global -> put data in coarse right hand side */ 1337 from = pcbddc->vec1_P; 1338 to = pcbddc->coarse_vec; 1339 } 1340 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1341 if (smode == SCATTER_FORWARD) { 1342 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1343 Vec tvec; 1344 PetscInt lsize; 1345 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1346 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1347 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1348 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1349 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1350 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1351 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1352 } 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 /* uncomment for testing purposes */ 1358 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1361 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1362 { 1363 PetscErrorCode ierr; 1364 PC_IS* pcis = (PC_IS*)(pc->data); 1365 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1366 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1367 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1368 MatType impMatType=MATSEQAIJ; 1369 /* one and zero */ 1370 PetscScalar one=1.0,zero=0.0; 1371 /* space to store constraints and their local indices */ 1372 PetscScalar *temp_quadrature_constraint; 1373 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1374 /* iterators */ 1375 PetscInt i,j,k,total_counts,temp_start_ptr; 1376 /* stuff to store connected components stored in pcbddc->mat_graph */ 1377 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1378 PetscInt n_ISForFaces,n_ISForEdges; 1379 /* near null space stuff */ 1380 MatNullSpace nearnullsp; 1381 const Vec *nearnullvecs; 1382 Vec *localnearnullsp; 1383 PetscBool nnsp_has_cnst; 1384 PetscInt nnsp_size; 1385 PetscScalar *array; 1386 /* BLAS integers */ 1387 PetscBLASInt lwork,lierr; 1388 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1389 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1390 /* LAPACK working arrays for SVD or POD */ 1391 PetscBool skip_lapack; 1392 PetscScalar *work; 1393 PetscReal *singular_vals; 1394 #if defined(PETSC_USE_COMPLEX) 1395 PetscReal *rwork; 1396 #endif 1397 #if defined(PETSC_MISSING_LAPACK_GESVD) 1398 PetscBLASInt Blas_one_2=1; 1399 PetscScalar *temp_basis,*correlation_mat; 1400 #else 1401 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1402 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1403 #endif 1404 /* reuse */ 1405 PetscInt olocal_primal_size; 1406 PetscInt *oprimal_indices_local_idxs; 1407 /* change of basis */ 1408 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1409 PetscBool boolforchange,qr_needed; 1410 PetscBT touched,change_basis,qr_needed_idx; 1411 /* auxiliary stuff */ 1412 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1413 /* some quantities */ 1414 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1415 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1416 1417 1418 PetscFunctionBegin; 1419 /* Destroy Mat objects computed previously */ 1420 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1421 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1422 /* Get index sets for faces, edges and vertices from graph */ 1423 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1424 pcbddc->use_vertices = PETSC_TRUE; 1425 } 1426 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1427 /* HACK: provide functions to set change of basis */ 1428 if (!ISForVertices && pcbddc->NullSpace) { 1429 pcbddc->use_change_of_basis = PETSC_TRUE; 1430 pcbddc->use_change_on_faces = PETSC_FALSE; 1431 } 1432 /* print some info */ 1433 if (pcbddc->dbg_flag) { 1434 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1435 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1436 i = 0; 1437 if (ISForVertices) { 1438 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1439 } 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1442 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1443 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1444 } 1445 /* check if near null space is attached to global mat */ 1446 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1447 if (nearnullsp) { 1448 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1449 /* remove any stored info */ 1450 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1451 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1452 /* store information for BDDC solver reuse */ 1453 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1454 pcbddc->onearnullspace = nearnullsp; 1455 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1456 for (i=0;i<nnsp_size;i++) { 1457 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1458 } 1459 } else { /* if near null space is not provided BDDC uses constants by default */ 1460 nnsp_size = 0; 1461 nnsp_has_cnst = PETSC_TRUE; 1462 } 1463 /* get max number of constraints on a single cc */ 1464 max_constraints = nnsp_size; 1465 if (nnsp_has_cnst) max_constraints++; 1466 1467 /* 1468 Evaluate maximum storage size needed by the procedure 1469 - temp_indices will contain start index of each constraint stored as follows 1470 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1471 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1472 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1473 */ 1474 total_counts = n_ISForFaces+n_ISForEdges; 1475 total_counts *= max_constraints; 1476 n_vertices = 0; 1477 if (ISForVertices) { 1478 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1479 } 1480 total_counts += n_vertices; 1481 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1482 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1483 total_counts = 0; 1484 max_size_of_constraint = 0; 1485 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1486 if (i<n_ISForEdges) { 1487 used_IS = &ISForEdges[i]; 1488 } else { 1489 used_IS = &ISForFaces[i-n_ISForEdges]; 1490 } 1491 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1492 total_counts += j; 1493 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1494 } 1495 total_counts *= max_constraints; 1496 total_counts += n_vertices; 1497 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1498 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1499 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1500 /* get local part of global near null space vectors */ 1501 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1502 for (k=0;k<nnsp_size;k++) { 1503 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1504 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 } 1507 1508 /* whether or not to skip lapack calls */ 1509 skip_lapack = PETSC_TRUE; 1510 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1511 1512 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1513 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1514 PetscScalar temp_work; 1515 #if defined(PETSC_MISSING_LAPACK_GESVD) 1516 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1517 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1518 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1519 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1520 #if defined(PETSC_USE_COMPLEX) 1521 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1522 #endif 1523 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1524 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1525 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1526 lwork = -1; 1527 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1528 #if !defined(PETSC_USE_COMPLEX) 1529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1530 #else 1531 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1532 #endif 1533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1534 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1535 #else /* on missing GESVD */ 1536 /* SVD */ 1537 PetscInt max_n,min_n; 1538 max_n = max_size_of_constraint; 1539 min_n = max_constraints; 1540 if (max_size_of_constraint < max_constraints) { 1541 min_n = max_size_of_constraint; 1542 max_n = max_constraints; 1543 } 1544 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1545 #if defined(PETSC_USE_COMPLEX) 1546 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1547 #endif 1548 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1549 lwork = -1; 1550 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1551 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1552 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1553 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1554 #if !defined(PETSC_USE_COMPLEX) 1555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1556 #else 1557 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1558 #endif 1559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1560 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1561 #endif /* on missing GESVD */ 1562 /* Allocate optimal workspace */ 1563 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1564 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1565 } 1566 /* Now we can loop on constraining sets */ 1567 total_counts = 0; 1568 temp_indices[0] = 0; 1569 /* vertices */ 1570 if (ISForVertices) { 1571 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1572 if (nnsp_has_cnst) { /* consider all vertices */ 1573 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1574 for (i=0;i<n_vertices;i++) { 1575 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1576 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1577 total_counts++; 1578 } 1579 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1580 PetscBool used_vertex; 1581 for (i=0;i<n_vertices;i++) { 1582 used_vertex = PETSC_FALSE; 1583 k = 0; 1584 while (!used_vertex && k<nnsp_size) { 1585 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1586 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1587 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1588 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1589 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1590 total_counts++; 1591 used_vertex = PETSC_TRUE; 1592 } 1593 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1594 k++; 1595 } 1596 } 1597 } 1598 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1599 n_vertices = total_counts; 1600 } 1601 1602 /* edges and faces */ 1603 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1604 if (i<n_ISForEdges) { 1605 used_IS = &ISForEdges[i]; 1606 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1607 } else { 1608 used_IS = &ISForFaces[i-n_ISForEdges]; 1609 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1610 } 1611 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1612 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1613 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1614 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1615 /* change of basis should not be performed on local periodic nodes */ 1616 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1617 if (nnsp_has_cnst) { 1618 PetscScalar quad_value; 1619 temp_constraints++; 1620 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1621 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1622 for (j=0;j<size_of_constraint;j++) { 1623 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1624 } 1625 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1626 total_counts++; 1627 } 1628 for (k=0;k<nnsp_size;k++) { 1629 PetscReal real_value; 1630 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1631 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1632 for (j=0;j<size_of_constraint;j++) { 1633 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1634 } 1635 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1636 /* check if array is null on the connected component */ 1637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1638 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1639 if (real_value > 0.0) { /* keep indices and values */ 1640 temp_constraints++; 1641 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1642 total_counts++; 1643 } 1644 } 1645 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1646 valid_constraints = temp_constraints; 1647 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1648 if (!pcbddc->use_nnsp_true && temp_constraints) { 1649 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1650 1651 #if defined(PETSC_MISSING_LAPACK_GESVD) 1652 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1653 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1654 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1655 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1656 from that computed using LAPACKgesvd 1657 -> This is due to a different computation of eigenvectors in LAPACKheev 1658 -> The quality of the POD-computed basis will be the same */ 1659 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1660 /* Store upper triangular part of correlation matrix */ 1661 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1662 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1663 for (j=0;j<temp_constraints;j++) { 1664 for (k=0;k<j+1;k++) { 1665 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1666 } 1667 } 1668 /* compute eigenvalues and eigenvectors of correlation matrix */ 1669 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1670 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1671 #if !defined(PETSC_USE_COMPLEX) 1672 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1673 #else 1674 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1675 #endif 1676 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1677 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1678 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1679 j=0; 1680 while (j < temp_constraints && singular_vals[j] < tol) j++; 1681 total_counts=total_counts-j; 1682 valid_constraints = temp_constraints-j; 1683 /* scale and copy POD basis into used quadrature memory */ 1684 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1685 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1688 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1690 if (j<temp_constraints) { 1691 PetscInt ii; 1692 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1693 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1694 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1695 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1696 for (k=0;k<temp_constraints-j;k++) { 1697 for (ii=0;ii<size_of_constraint;ii++) { 1698 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1699 } 1700 } 1701 } 1702 #else /* on missing GESVD */ 1703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1704 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1705 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1706 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1707 #if !defined(PETSC_USE_COMPLEX) 1708 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1709 #else 1710 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1711 #endif 1712 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1713 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1714 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1715 k = temp_constraints; 1716 if (k > size_of_constraint) k = size_of_constraint; 1717 j = 0; 1718 while (j < k && singular_vals[k-j-1] < tol) j++; 1719 total_counts = total_counts-temp_constraints+k-j; 1720 valid_constraints = k-j; 1721 #endif /* on missing GESVD */ 1722 } 1723 /* setting change_of_basis flag is safe now */ 1724 if (boolforchange) { 1725 for (j=0;j<valid_constraints;j++) { 1726 PetscBTSet(change_basis,total_counts-j-1); 1727 } 1728 } 1729 } 1730 /* free index sets of faces, edges and vertices */ 1731 for (i=0;i<n_ISForFaces;i++) { 1732 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1733 } 1734 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1735 for (i=0;i<n_ISForEdges;i++) { 1736 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1737 } 1738 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1739 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1740 /* map temp_indices_to_constraint in boundary numbering */ 1741 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1742 if (i != temp_indices[total_counts]) { 1743 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1744 } 1745 1746 /* free workspace */ 1747 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1748 ierr = PetscFree(work);CHKERRQ(ierr); 1749 #if defined(PETSC_USE_COMPLEX) 1750 ierr = PetscFree(rwork);CHKERRQ(ierr); 1751 #endif 1752 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1753 #if defined(PETSC_MISSING_LAPACK_GESVD) 1754 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1755 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1756 #endif 1757 } 1758 for (k=0;k<nnsp_size;k++) { 1759 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1760 } 1761 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1762 1763 /* set quantities in pcbddc data structure and store previous primal size */ 1764 /* n_vertices defines the number of subdomain corners in the primal space */ 1765 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1766 olocal_primal_size = pcbddc->local_primal_size; 1767 pcbddc->local_primal_size = total_counts; 1768 pcbddc->n_vertices = n_vertices; 1769 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1770 1771 /* Create constraint matrix */ 1772 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1773 /* so we need to set it up properly either with or without change of basis */ 1774 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1775 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1776 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1777 /* array to compute a local numbering of constraints : vertices first then constraints */ 1778 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1779 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1780 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1781 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1782 /* auxiliary stuff for basis change */ 1783 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1784 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1785 1786 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1787 total_primal_vertices=0; 1788 for (i=0;i<pcbddc->local_primal_size;i++) { 1789 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1790 if (size_of_constraint == 1) { 1791 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1792 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1793 aux_primal_minloc[total_primal_vertices]=0; 1794 total_primal_vertices++; 1795 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1796 PetscInt min_loc,min_index; 1797 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1798 /* find first untouched local node */ 1799 k = 0; 1800 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1801 min_index = global_indices[k]; 1802 min_loc = k; 1803 /* search the minimum among global nodes already untouched on the cc */ 1804 for (k=1;k<size_of_constraint;k++) { 1805 /* there can be more than one constraint on a single connected component */ 1806 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1807 min_index = global_indices[k]; 1808 min_loc = k; 1809 } 1810 } 1811 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1812 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1813 aux_primal_minloc[total_primal_vertices]=min_loc; 1814 total_primal_vertices++; 1815 } 1816 } 1817 /* determine if a QR strategy is needed for change of basis */ 1818 qr_needed = PETSC_FALSE; 1819 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1820 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1821 if (PetscBTLookup(change_basis,i)) { 1822 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1823 j = 0; 1824 for (k=0;k<size_of_constraint;k++) { 1825 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1826 j++; 1827 } 1828 } 1829 /* found more than one primal dof on the cc */ 1830 if (j > 1) { 1831 PetscBTSet(qr_needed_idx,i); 1832 qr_needed = PETSC_TRUE; 1833 } 1834 } 1835 } 1836 /* free workspace */ 1837 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1838 1839 /* permute indices in order to have a sorted set of vertices */ 1840 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1841 1842 /* nonzero structure of constraint matrix */ 1843 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1844 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1845 j=total_primal_vertices; 1846 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1847 if (!PetscBTLookup(change_basis,i)) { 1848 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1849 j++; 1850 } 1851 } 1852 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1853 ierr = PetscFree(nnz);CHKERRQ(ierr); 1854 /* set values in constraint matrix */ 1855 for (i=0;i<total_primal_vertices;i++) { 1856 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1857 } 1858 total_counts = total_primal_vertices; 1859 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1860 if (!PetscBTLookup(change_basis,i)) { 1861 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1862 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1863 total_counts++; 1864 } 1865 } 1866 /* assembling */ 1867 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1868 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1869 /* 1870 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1871 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1872 */ 1873 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1874 if (pcbddc->use_change_of_basis) { 1875 /* dual and primal dofs on a single cc */ 1876 PetscInt dual_dofs,primal_dofs; 1877 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1878 PetscInt primal_counter; 1879 /* working stuff for GEQRF */ 1880 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1881 PetscBLASInt lqr_work; 1882 /* working stuff for UNGQR */ 1883 PetscScalar *gqr_work,lgqr_work_t; 1884 PetscBLASInt lgqr_work; 1885 /* working stuff for TRTRS */ 1886 PetscScalar *trs_rhs; 1887 PetscBLASInt Blas_NRHS; 1888 /* pointers for values insertion into change of basis matrix */ 1889 PetscInt *start_rows,*start_cols; 1890 PetscScalar *start_vals; 1891 /* working stuff for values insertion */ 1892 PetscBT is_primal; 1893 1894 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1895 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1896 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1897 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1898 /* work arrays */ 1899 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1900 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1901 /* nonzeros per row */ 1902 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1903 if (PetscBTLookup(change_basis,i)) { 1904 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1905 if (PetscBTLookup(qr_needed_idx,i)) { 1906 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1907 } else { 1908 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1909 /* get local primal index on the cc */ 1910 j = 0; 1911 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1912 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1913 } 1914 } 1915 } 1916 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1917 ierr = PetscFree(nnz);CHKERRQ(ierr); 1918 /* Set initial identity in the matrix */ 1919 for (i=0;i<pcis->n_B;i++) { 1920 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1921 } 1922 1923 if (pcbddc->dbg_flag) { 1924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1926 } 1927 1928 1929 /* Now we loop on the constraints which need a change of basis */ 1930 /* 1931 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1932 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1933 1934 Basic blocks of change of basis matrix T computed 1935 1936 - Using the following block transformation if there is only a primal dof on the cc 1937 (in the example, primal dof is the last one of the edge in LOCAL ordering 1938 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1939 | 1 0 ... 0 1 | 1940 | 0 1 ... 0 1 | 1941 | ... | 1942 | 0 ... 1 1 | 1943 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1944 1945 - via QR decomposition of constraints otherwise 1946 */ 1947 if (qr_needed) { 1948 /* space to store Q */ 1949 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1950 /* first we issue queries for optimal work */ 1951 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1953 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1954 lqr_work = -1; 1955 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1956 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1957 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1958 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1959 lgqr_work = -1; 1960 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1962 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1963 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1964 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1965 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1967 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1968 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1969 /* array to store scaling factors for reflectors */ 1970 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1971 /* array to store rhs and solution of triangular solver */ 1972 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1973 /* allocating workspace for check */ 1974 if (pcbddc->dbg_flag) { 1975 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1976 } 1977 } 1978 /* array to store whether a node is primal or not */ 1979 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1980 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1981 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1982 if (i != total_primal_vertices) { 1983 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1984 } 1985 for (i=0;i<total_primal_vertices;i++) { 1986 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1987 } 1988 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1989 1990 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1991 /* -> using implicit ordering contained in temp_indices data */ 1992 total_counts = pcbddc->n_vertices; 1993 primal_counter = total_counts; 1994 while (total_counts<pcbddc->local_primal_size) { 1995 primal_dofs = 1; 1996 if (PetscBTLookup(change_basis,total_counts)) { 1997 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1998 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1999 primal_dofs++; 2000 } 2001 /* get constraint info */ 2002 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2003 dual_dofs = size_of_constraint-primal_dofs; 2004 2005 if (pcbddc->dbg_flag) { 2006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2007 } 2008 2009 if (primal_dofs > 1) { /* QR */ 2010 2011 /* copy quadrature constraints for change of basis check */ 2012 if (pcbddc->dbg_flag) { 2013 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2014 } 2015 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2016 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2017 2018 /* compute QR decomposition of constraints */ 2019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2020 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2021 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2022 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2023 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2024 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2025 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2026 2027 /* explictly compute R^-T */ 2028 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2029 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2030 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2032 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2033 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2034 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2035 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2036 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2037 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2038 2039 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2042 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2043 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2044 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2045 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2046 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2047 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2048 2049 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2050 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2051 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2052 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2053 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2058 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2059 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2060 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2061 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2062 2063 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2064 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2065 /* insert cols for primal dofs */ 2066 for (j=0;j<primal_dofs;j++) { 2067 start_vals = &qr_basis[j*size_of_constraint]; 2068 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2069 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2070 } 2071 /* insert cols for dual dofs */ 2072 for (j=0,k=0;j<dual_dofs;k++) { 2073 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2074 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2075 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2076 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2077 j++; 2078 } 2079 } 2080 2081 /* check change of basis */ 2082 if (pcbddc->dbg_flag) { 2083 PetscInt ii,jj; 2084 PetscBool valid_qr=PETSC_TRUE; 2085 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2090 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2091 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2092 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2093 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2094 for (jj=0;jj<size_of_constraint;jj++) { 2095 for (ii=0;ii<primal_dofs;ii++) { 2096 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2097 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2098 } 2099 } 2100 if (!valid_qr) { 2101 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2102 for (jj=0;jj<size_of_constraint;jj++) { 2103 for (ii=0;ii<primal_dofs;ii++) { 2104 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2105 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2106 } 2107 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2108 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2109 } 2110 } 2111 } 2112 } else { 2113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2114 } 2115 } 2116 } else { /* simple transformation block */ 2117 PetscInt row,col; 2118 PetscScalar val; 2119 for (j=0;j<size_of_constraint;j++) { 2120 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2121 if (!PetscBTLookup(is_primal,row)) { 2122 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2123 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2124 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2125 } else { 2126 for (k=0;k<size_of_constraint;k++) { 2127 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2128 if (row != col) { 2129 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2130 } else { 2131 val = 1.0; 2132 } 2133 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2134 } 2135 } 2136 } 2137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2138 } 2139 /* increment primal counter */ 2140 primal_counter += primal_dofs; 2141 } else { 2142 if (pcbddc->dbg_flag) { 2143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2144 } 2145 } 2146 /* increment constraint counter total_counts */ 2147 total_counts += primal_dofs; 2148 } 2149 2150 /* free workspace */ 2151 if (qr_needed) { 2152 if (pcbddc->dbg_flag) { 2153 ierr = PetscFree(work);CHKERRQ(ierr); 2154 } 2155 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2156 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2157 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2158 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2159 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2160 } 2161 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2162 /* assembling */ 2163 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 /* 2166 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2167 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2168 */ 2169 } 2170 2171 /* get indices in local ordering for vertices and constraints */ 2172 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2173 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2174 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2175 } 2176 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2177 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2178 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2179 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2180 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2181 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2182 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2183 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2184 /* set quantities in PCBDDC data struct */ 2185 pcbddc->n_actual_vertices = i; 2186 /* check if a new primal space has been introduced */ 2187 pcbddc->new_primal_space_local = PETSC_TRUE; 2188 if (olocal_primal_size == pcbddc->local_primal_size) { 2189 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2190 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2191 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2192 } 2193 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2194 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2195 2196 /* flush dbg viewer */ 2197 if (pcbddc->dbg_flag) { 2198 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2199 } 2200 2201 /* free workspace */ 2202 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2203 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2204 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2205 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2206 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2207 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2208 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2209 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2215 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2216 { 2217 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2218 PC_IS *pcis = (PC_IS*)pc->data; 2219 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2220 PetscInt ierr,i,vertex_size; 2221 PetscViewer viewer=pcbddc->dbg_viewer; 2222 2223 PetscFunctionBegin; 2224 /* Reset previously computed graph */ 2225 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2226 /* Init local Graph struct */ 2227 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2228 2229 /* Check validity of the csr graph passed in by the user */ 2230 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2231 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2232 } 2233 2234 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2235 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2236 Mat mat_adj; 2237 const PetscInt *xadj,*adjncy; 2238 PetscBool flg_row=PETSC_TRUE; 2239 2240 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2241 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2242 if (!flg_row) { 2243 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2244 } 2245 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2246 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2247 if (!flg_row) { 2248 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2249 } 2250 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2251 } 2252 2253 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2254 vertex_size = 1; 2255 if (pcbddc->user_provided_isfordofs) { 2256 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2257 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2258 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2259 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2260 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2261 } 2262 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2263 pcbddc->n_ISForDofs = 0; 2264 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2265 } 2266 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2267 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2268 } else { 2269 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2270 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2271 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2272 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2273 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2274 } 2275 } 2276 } 2277 2278 /* Setup of Graph */ 2279 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2280 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2281 } 2282 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2283 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2284 } 2285 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2286 2287 /* Graph's connected components analysis */ 2288 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2289 2290 /* print some info to stdout */ 2291 if (pcbddc->dbg_flag) { 2292 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2293 } 2294 2295 /* mark topography has done */ 2296 pcbddc->recompute_topography = PETSC_FALSE; 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2302 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2303 { 2304 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2305 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2306 PetscErrorCode ierr; 2307 2308 PetscFunctionBegin; 2309 n = 0; 2310 vertices = 0; 2311 if (pcbddc->ConstraintMatrix) { 2312 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2313 for (i=0;i<local_primal_size;i++) { 2314 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2315 if (size_of_constraint == 1) n++; 2316 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2317 } 2318 if (vertices_idx) { 2319 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2320 n = 0; 2321 for (i=0;i<local_primal_size;i++) { 2322 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2323 if (size_of_constraint == 1) { 2324 vertices[n++]=row_cmat_indices[0]; 2325 } 2326 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2327 } 2328 } 2329 } 2330 *n_vertices = n; 2331 if (vertices_idx) *vertices_idx = vertices; 2332 PetscFunctionReturn(0); 2333 } 2334 2335 #undef __FUNCT__ 2336 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2337 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2338 { 2339 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2340 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2341 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2342 PetscBT touched; 2343 PetscErrorCode ierr; 2344 2345 /* This function assumes that the number of local constraints per connected component 2346 is not greater than the number of nodes defined for the connected component 2347 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2348 PetscFunctionBegin; 2349 n = 0; 2350 constraints_index = 0; 2351 if (pcbddc->ConstraintMatrix) { 2352 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2353 max_size_of_constraint = 0; 2354 for (i=0;i<local_primal_size;i++) { 2355 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2356 if (size_of_constraint > 1) { 2357 n++; 2358 } 2359 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2360 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2361 } 2362 if (constraints_idx) { 2363 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2364 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2365 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2366 n = 0; 2367 for (i=0;i<local_primal_size;i++) { 2368 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2369 if (size_of_constraint > 1) { 2370 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2371 /* find first untouched local node */ 2372 j = 0; 2373 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2374 min_index = row_cmat_global_indices[j]; 2375 min_loc = j; 2376 /* search the minimum among nodes not yet touched on the connected component 2377 since there can be more than one constraint on a single cc */ 2378 for (j=1;j<size_of_constraint;j++) { 2379 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2380 min_index = row_cmat_global_indices[j]; 2381 min_loc = j; 2382 } 2383 } 2384 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2385 constraints_index[n++] = row_cmat_indices[min_loc]; 2386 } 2387 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2388 } 2389 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2390 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2391 } 2392 } 2393 *n_constraints = n; 2394 if (constraints_idx) *constraints_idx = constraints_index; 2395 PetscFunctionReturn(0); 2396 } 2397 2398 /* the next two functions has been adapted from pcis.c */ 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "PCBDDCApplySchur" 2401 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2402 { 2403 PetscErrorCode ierr; 2404 PC_IS *pcis = (PC_IS*)(pc->data); 2405 2406 PetscFunctionBegin; 2407 if (!vec2_B) { vec2_B = v; } 2408 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2409 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2410 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2411 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2412 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 #undef __FUNCT__ 2417 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2418 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2419 { 2420 PetscErrorCode ierr; 2421 PC_IS *pcis = (PC_IS*)(pc->data); 2422 2423 PetscFunctionBegin; 2424 if (!vec2_B) { vec2_B = v; } 2425 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2426 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2427 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2428 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2429 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "PCBDDCSubsetNumbering" 2435 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2436 { 2437 Vec local_vec,global_vec; 2438 IS seqis,paris; 2439 VecScatter scatter_ctx; 2440 PetscScalar *array; 2441 PetscInt *temp_global_dofs; 2442 PetscScalar globalsum; 2443 PetscInt i,j,s; 2444 PetscInt nlocals,first_index,old_index,max_local; 2445 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2446 PetscMPIInt *dof_sizes,*dof_displs; 2447 PetscBool first_found; 2448 PetscErrorCode ierr; 2449 2450 PetscFunctionBegin; 2451 /* mpi buffers */ 2452 MPI_Comm_size(comm,&size_prec_comm); 2453 MPI_Comm_rank(comm,&rank_prec_comm); 2454 j = ( !rank_prec_comm ? size_prec_comm : 0); 2455 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2456 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2457 /* get maximum size of subset */ 2458 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2459 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2460 max_local = 0; 2461 if (n_local_dofs) { 2462 max_local = temp_global_dofs[0]; 2463 for (i=1;i<n_local_dofs;i++) { 2464 if (max_local < temp_global_dofs[i] ) { 2465 max_local = temp_global_dofs[i]; 2466 } 2467 } 2468 } 2469 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2470 max_global++; 2471 max_local = 0; 2472 if (n_local_dofs) { 2473 max_local = local_dofs[0]; 2474 for (i=1;i<n_local_dofs;i++) { 2475 if (max_local < local_dofs[i] ) { 2476 max_local = local_dofs[i]; 2477 } 2478 } 2479 } 2480 max_local++; 2481 /* allocate workspace */ 2482 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2483 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2484 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2485 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2486 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2487 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2488 /* create scatter */ 2489 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2490 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2491 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2492 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2493 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2494 /* init array */ 2495 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2496 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2497 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2498 if (local_dofs_mult) { 2499 for (i=0;i<n_local_dofs;i++) { 2500 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2501 } 2502 } else { 2503 for (i=0;i<n_local_dofs;i++) { 2504 array[local_dofs[i]]=1.0; 2505 } 2506 } 2507 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2508 /* scatter into global vec and get total number of global dofs */ 2509 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2511 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2512 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2513 /* Fill global_vec with cumulative function for global numbering */ 2514 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2515 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2516 nlocals = 0; 2517 first_index = -1; 2518 first_found = PETSC_FALSE; 2519 for (i=0;i<s;i++) { 2520 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2521 first_found = PETSC_TRUE; 2522 first_index = i; 2523 } 2524 nlocals += (PetscInt)PetscRealPart(array[i]); 2525 } 2526 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2527 if (!rank_prec_comm) { 2528 dof_displs[0]=0; 2529 for (i=1;i<size_prec_comm;i++) { 2530 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2531 } 2532 } 2533 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2534 if (first_found) { 2535 array[first_index] += (PetscScalar)nlocals; 2536 old_index = first_index; 2537 for (i=first_index+1;i<s;i++) { 2538 if (PetscRealPart(array[i]) > 0.0) { 2539 array[i] += array[old_index]; 2540 old_index = i; 2541 } 2542 } 2543 } 2544 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2545 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2546 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2547 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2548 /* get global ordering of local dofs */ 2549 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2550 if (local_dofs_mult) { 2551 for (i=0;i<n_local_dofs;i++) { 2552 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2553 } 2554 } else { 2555 for (i=0;i<n_local_dofs;i++) { 2556 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2557 } 2558 } 2559 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2560 /* free workspace */ 2561 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2562 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2563 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2564 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2565 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2566 /* return pointer to global ordering of local dofs */ 2567 *global_numbering_subset = temp_global_dofs; 2568 PetscFunctionReturn(0); 2569 } 2570 2571 #undef __FUNCT__ 2572 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2573 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2574 { 2575 PetscInt i,j; 2576 PetscScalar *alphas; 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 /* this implements stabilized Gram-Schmidt */ 2581 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2582 for (i=0;i<n;i++) { 2583 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2584 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2585 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2586 } 2587 ierr = PetscFree(alphas);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 /* TODO 2592 - now preallocation is done assuming SEQDENSE local matrices 2593 */ 2594 #undef __FUNCT__ 2595 #define __FUNCT__ "MatISGetMPIXAIJ" 2596 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2597 { 2598 Mat new_mat; 2599 Mat_IS *matis = (Mat_IS*)(mat->data); 2600 /* info on mat */ 2601 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2602 PetscInt bs,rows,cols; 2603 PetscInt lrows,lcols; 2604 PetscInt local_rows,local_cols; 2605 PetscBool isdense; 2606 /* values insertion */ 2607 PetscScalar *array; 2608 PetscInt *local_indices,*global_indices; 2609 /* work */ 2610 PetscInt i,j,index_row; 2611 PetscErrorCode ierr; 2612 2613 PetscFunctionBegin; 2614 /* MISSING CHECKS 2615 - rectangular case not covered (it is not allowed by MATIS) 2616 */ 2617 /* get info from mat */ 2618 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2619 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2620 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2621 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2622 2623 /* work */ 2624 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2625 for (i=0;i<local_rows;i++) local_indices[i]=i; 2626 /* map indices of local mat to global values */ 2627 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2628 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2629 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2630 2631 if (reuse==MAT_INITIAL_MATRIX) { 2632 Vec vec_dnz,vec_onz; 2633 PetscScalar *my_dnz,*my_onz; 2634 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2635 PetscInt index_col,owner; 2636 PetscMPIInt nsubdomains; 2637 2638 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2639 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2640 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2641 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2642 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2643 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2644 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2645 2646 /* 2647 preallocation 2648 */ 2649 2650 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2651 /* 2652 Some vectors are needed to sum up properly on shared interface dofs. 2653 Preallocation macros cannot do the job. 2654 Note that preallocation is not exact, since it overestimates nonzeros 2655 */ 2656 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2657 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2658 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2659 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2660 /* All processes need to compute entire row ownership */ 2661 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2662 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2663 for (i=0;i<nsubdomains;i++) { 2664 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2665 row_ownership[j]=i; 2666 } 2667 } 2668 2669 /* 2670 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2671 then, they will be summed up properly. This way, preallocation is always sufficient 2672 */ 2673 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2674 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2675 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2676 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2677 for (i=0;i<local_rows;i++) { 2678 index_row = global_indices[i]; 2679 for (j=i;j<local_rows;j++) { 2680 owner = row_ownership[index_row]; 2681 index_col = global_indices[j]; 2682 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2683 my_dnz[i] += 1.0; 2684 } else { /* offdiag block */ 2685 my_onz[i] += 1.0; 2686 } 2687 /* same as before, interchanging rows and cols */ 2688 if (i != j) { 2689 owner = row_ownership[index_col]; 2690 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2691 my_dnz[j] += 1.0; 2692 } else { 2693 my_onz[j] += 1.0; 2694 } 2695 } 2696 } 2697 } 2698 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2699 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2700 if (local_rows) { /* multilevel guard */ 2701 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2702 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2703 } 2704 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2705 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2706 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2707 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2708 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2709 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2710 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2711 2712 /* set computed preallocation in dnz and onz */ 2713 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2714 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2715 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2716 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2717 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2718 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2719 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2720 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2721 2722 /* Resize preallocation if overestimated */ 2723 for (i=0;i<lrows;i++) { 2724 dnz[i] = PetscMin(dnz[i],lcols); 2725 onz[i] = PetscMin(onz[i],cols-lcols); 2726 } 2727 /* set preallocation */ 2728 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2729 for (i=0;i<lrows/bs;i++) { 2730 dnz[i] = dnz[i*bs]/bs; 2731 onz[i] = onz[i*bs]/bs; 2732 } 2733 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2734 for (i=0;i<lrows/bs;i++) { 2735 dnz[i] = dnz[i]-i; 2736 } 2737 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2738 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2739 *M = new_mat; 2740 } else { 2741 PetscInt mbs,mrows,mcols; 2742 /* some checks */ 2743 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2744 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2745 if (mrows != rows) { 2746 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2747 } 2748 if (mrows != rows) { 2749 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2750 } 2751 if (mbs != bs) { 2752 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2753 } 2754 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2755 } 2756 /* set local to global mappings */ 2757 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2758 /* Set values */ 2759 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2760 if (isdense) { /* special case for dense local matrices */ 2761 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2762 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2763 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2764 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2765 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2766 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2767 } else { /* very basic values insertion for all other matrix types */ 2768 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2769 for (i=0;i<local_rows;i++) { 2770 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2771 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2772 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2773 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2774 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2775 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2776 } 2777 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2778 } 2779 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2780 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2781 if (isdense) { 2782 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2783 } 2784 PetscFunctionReturn(0); 2785 } 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatISGetSubassemblingPattern" 2789 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2790 { 2791 Mat subdomain_adj; 2792 IS new_ranks,ranks_send_to; 2793 MatPartitioning partitioner; 2794 Mat_IS *matis; 2795 PetscInt n_neighs,*neighs,*n_shared,**shared; 2796 PetscInt prank; 2797 PetscMPIInt size,rank,color; 2798 PetscInt *xadj,*adjncy,*oldranks; 2799 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2800 PetscInt i,j,local_size,threshold=0; 2801 PetscErrorCode ierr; 2802 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2803 PetscSubcomm subcomm; 2804 2805 PetscFunctionBegin; 2806 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2807 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2808 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2809 2810 /* Get info on mapping */ 2811 matis = (Mat_IS*)(mat->data); 2812 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2813 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2814 2815 /* build local CSR graph of subdomains' connectivity */ 2816 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2817 xadj[0] = 0; 2818 xadj[1] = PetscMax(n_neighs-1,0); 2819 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2820 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2821 2822 if (threshold) { 2823 PetscInt* count,min_threshold; 2824 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2825 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2826 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2827 for (j=0;j<n_shared[i];j++) { 2828 count[shared[i][j]] += 1; 2829 } 2830 } 2831 /* adapt threshold since we dont want to lose significant connections */ 2832 min_threshold = n_neighs; 2833 for (i=1;i<n_neighs;i++) { 2834 for (j=0;j<n_shared[i];j++) { 2835 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2836 } 2837 } 2838 threshold = PetscMax(min_threshold+1,threshold); 2839 xadj[1] = 0; 2840 for (i=1;i<n_neighs;i++) { 2841 for (j=0;j<n_shared[i];j++) { 2842 if (count[shared[i][j]] < threshold) { 2843 adjncy[xadj[1]] = neighs[i]; 2844 adjncy_wgt[xadj[1]] = n_shared[i]; 2845 xadj[1]++; 2846 break; 2847 } 2848 } 2849 } 2850 ierr = PetscFree(count);CHKERRQ(ierr); 2851 } else { 2852 if (xadj[1]) { 2853 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2854 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2855 } 2856 } 2857 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2858 if (use_square) { 2859 for (i=0;i<xadj[1];i++) { 2860 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2861 } 2862 } 2863 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2864 2865 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2866 2867 /* 2868 Restrict work on active processes only. 2869 */ 2870 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2871 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2872 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2873 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2874 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2875 if (color) { 2876 ierr = PetscFree(xadj);CHKERRQ(ierr); 2877 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2878 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2879 } else { 2880 PetscInt coarsening_ratio; 2881 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2882 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2883 prank = rank; 2884 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2885 /* 2886 for (i=0;i<size;i++) { 2887 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2888 } 2889 */ 2890 for (i=0;i<xadj[1];i++) { 2891 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2892 } 2893 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2894 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2895 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2896 2897 /* Partition */ 2898 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2899 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2900 if (use_vwgt) { 2901 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2902 v_wgt[0] = local_size; 2903 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2904 } 2905 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2906 coarsening_ratio = size/n_subdomains; 2907 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2908 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2909 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2910 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2911 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2912 2913 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2914 if (contiguous) { 2915 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2916 } else { 2917 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2918 } 2919 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2920 /* clean up */ 2921 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2922 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2923 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2924 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2925 } 2926 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2927 2928 /* assemble parallel IS for sends */ 2929 i = 1; 2930 if (color) i=0; 2931 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2932 2933 /* get back IS */ 2934 *is_sends = ranks_send_to; 2935 PetscFunctionReturn(0); 2936 } 2937 2938 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2939 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatISSubassemble" 2942 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2943 { 2944 Mat local_mat; 2945 Mat_IS *matis; 2946 IS is_sends_internal; 2947 PetscInt rows,cols; 2948 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2949 PetscBool ismatis,isdense,destroy_mat; 2950 ISLocalToGlobalMapping l2gmap; 2951 PetscInt* l2gmap_indices; 2952 const PetscInt* is_indices; 2953 MatType new_local_type; 2954 /* buffers */ 2955 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2956 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2957 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2958 /* MPI */ 2959 MPI_Comm comm,comm_n; 2960 PetscSubcomm subcomm; 2961 PetscMPIInt n_sends,n_recvs,commsize; 2962 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2963 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2964 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2965 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2966 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2967 PetscErrorCode ierr; 2968 2969 PetscFunctionBegin; 2970 /* TODO: add missing checks */ 2971 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2972 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2973 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2974 PetscValidLogicalCollectiveInt(mat,nis,7); 2975 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2976 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2977 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2978 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2979 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2980 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2981 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2982 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2983 PetscInt mrows,mcols,mnrows,mncols; 2984 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2985 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2986 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2987 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2988 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2989 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2990 } 2991 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2992 PetscValidLogicalCollectiveInt(mat,bs,0); 2993 /* prepare IS for sending if not provided */ 2994 if (!is_sends) { 2995 PetscBool pcontig = PETSC_TRUE; 2996 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2997 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2998 } else { 2999 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3000 is_sends_internal = is_sends; 3001 } 3002 3003 /* get pointer of MATIS data */ 3004 matis = (Mat_IS*)mat->data; 3005 3006 /* get comm */ 3007 comm = PetscObjectComm((PetscObject)mat); 3008 3009 /* compute number of sends */ 3010 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3011 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3012 3013 /* compute number of receives */ 3014 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3015 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 3016 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3017 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3018 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3019 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3020 ierr = PetscFree(iflags);CHKERRQ(ierr); 3021 3022 /* restrict comm if requested */ 3023 subcomm = 0; 3024 destroy_mat = PETSC_FALSE; 3025 if (restrict_comm) { 3026 PetscMPIInt color,rank,subcommsize; 3027 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3028 color = 0; 3029 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3030 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3031 subcommsize = commsize - subcommsize; 3032 /* check if reuse has been requested */ 3033 if (reuse == MAT_REUSE_MATRIX) { 3034 if (*mat_n) { 3035 PetscMPIInt subcommsize2; 3036 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3037 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3038 comm_n = PetscObjectComm((PetscObject)*mat_n); 3039 } else { 3040 comm_n = PETSC_COMM_SELF; 3041 } 3042 } else { /* MAT_INITIAL_MATRIX */ 3043 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3044 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3045 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3046 comm_n = subcomm->comm; 3047 } 3048 /* flag to destroy *mat_n if not significative */ 3049 if (color) destroy_mat = PETSC_TRUE; 3050 } else { 3051 comm_n = comm; 3052 } 3053 3054 /* prepare send/receive buffers */ 3055 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 3056 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3057 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 3058 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3059 if (nis) { 3060 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3061 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3062 } 3063 3064 /* Get data from local matrices */ 3065 if (!isdense) { 3066 /* TODO: See below some guidelines on how to prepare the local buffers */ 3067 /* 3068 send_buffer_vals should contain the raw values of the local matrix 3069 send_buffer_idxs should contain: 3070 - MatType_PRIVATE type 3071 - PetscInt size_of_l2gmap 3072 - PetscInt global_row_indices[size_of_l2gmap] 3073 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3074 */ 3075 } else { 3076 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3077 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3078 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 3079 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3080 send_buffer_idxs[1] = i; 3081 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3082 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3083 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3084 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3085 for (i=0;i<n_sends;i++) { 3086 ilengths_vals[is_indices[i]] = len*len; 3087 ilengths_idxs[is_indices[i]] = len+2; 3088 } 3089 } 3090 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3091 /* additional is (if any) */ 3092 if (nis) { 3093 PetscMPIInt psum; 3094 PetscInt j; 3095 for (j=0,psum=0;j<nis;j++) { 3096 PetscInt plen; 3097 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3098 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3099 psum += len+1; /* indices + lenght */ 3100 } 3101 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3102 for (j=0,psum=0;j<nis;j++) { 3103 PetscInt plen; 3104 const PetscInt *is_array_idxs; 3105 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3106 send_buffer_idxs_is[psum] = plen; 3107 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3108 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3109 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3110 psum += plen+1; /* indices + lenght */ 3111 } 3112 for (i=0;i<n_sends;i++) { 3113 ilengths_idxs_is[is_indices[i]] = psum; 3114 } 3115 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3116 } 3117 3118 buf_size_idxs = 0; 3119 buf_size_vals = 0; 3120 buf_size_idxs_is = 0; 3121 for (i=0;i<n_recvs;i++) { 3122 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3123 buf_size_vals += (PetscInt)olengths_vals[i]; 3124 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3125 } 3126 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 3127 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 3128 ierr = PetscMalloc(buf_size_idxs_is*sizeof(PetscInt),&recv_buffer_idxs_is);CHKERRQ(ierr); 3129 3130 /* get new tags for clean communications */ 3131 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3133 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3134 3135 /* allocate for requests */ 3136 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 3137 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 3138 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs_is);CHKERRQ(ierr); 3139 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 3140 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 3141 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs_is);CHKERRQ(ierr); 3142 3143 /* communications */ 3144 ptr_idxs = recv_buffer_idxs; 3145 ptr_vals = recv_buffer_vals; 3146 ptr_idxs_is = recv_buffer_idxs_is; 3147 for (i=0;i<n_recvs;i++) { 3148 source_dest = onodes[i]; 3149 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3150 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3151 ptr_idxs += olengths_idxs[i]; 3152 ptr_vals += olengths_vals[i]; 3153 if (nis) { 3154 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3155 ptr_idxs_is += olengths_idxs_is[i]; 3156 } 3157 } 3158 for (i=0;i<n_sends;i++) { 3159 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3160 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3161 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3162 if (nis) { 3163 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3164 } 3165 } 3166 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3167 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3168 3169 /* assemble new l2g map */ 3170 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3171 ptr_idxs = recv_buffer_idxs; 3172 buf_size_idxs = 0; 3173 for (i=0;i<n_recvs;i++) { 3174 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3175 ptr_idxs += olengths_idxs[i]; 3176 } 3177 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3178 ptr_idxs = recv_buffer_idxs; 3179 buf_size_idxs = 0; 3180 for (i=0;i<n_recvs;i++) { 3181 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3182 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3183 ptr_idxs += olengths_idxs[i]; 3184 } 3185 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3186 ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3187 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3188 3189 /* infer new local matrix type from received local matrices type */ 3190 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3191 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3192 if (n_recvs) { 3193 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3194 ptr_idxs = recv_buffer_idxs; 3195 for (i=0;i<n_recvs;i++) { 3196 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3197 new_local_type_private = MATAIJ_PRIVATE; 3198 break; 3199 } 3200 ptr_idxs += olengths_idxs[i]; 3201 } 3202 switch (new_local_type_private) { 3203 case MATDENSE_PRIVATE: 3204 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3205 new_local_type = MATSEQAIJ; 3206 bs = 1; 3207 } else { /* if I receive only 1 dense matrix */ 3208 new_local_type = MATSEQDENSE; 3209 bs = 1; 3210 } 3211 break; 3212 case MATAIJ_PRIVATE: 3213 new_local_type = MATSEQAIJ; 3214 bs = 1; 3215 break; 3216 case MATBAIJ_PRIVATE: 3217 new_local_type = MATSEQBAIJ; 3218 break; 3219 case MATSBAIJ_PRIVATE: 3220 new_local_type = MATSEQSBAIJ; 3221 break; 3222 default: 3223 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3224 break; 3225 } 3226 } else { /* by default, new_local_type is seqdense */ 3227 new_local_type = MATSEQDENSE; 3228 bs = 1; 3229 } 3230 3231 /* create MATIS object if needed */ 3232 if (reuse == MAT_INITIAL_MATRIX) { 3233 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3234 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3235 } else { 3236 /* it also destroys the local matrices */ 3237 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3238 } 3239 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3240 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3241 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3242 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3243 3244 /* set values */ 3245 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3246 ptr_vals = recv_buffer_vals; 3247 ptr_idxs = recv_buffer_idxs; 3248 for (i=0;i<n_recvs;i++) { 3249 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3250 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3251 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3252 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3253 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3254 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3255 } else { 3256 /* TODO */ 3257 } 3258 ptr_idxs += olengths_idxs[i]; 3259 ptr_vals += olengths_vals[i]; 3260 } 3261 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3262 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3263 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 3266 #if 1 3267 if (!restrict_comm) { /* check */ 3268 Vec lvec,rvec; 3269 PetscReal infty_error; 3270 3271 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3272 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3273 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3274 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3275 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3276 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3277 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3278 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3279 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3280 } 3281 #endif 3282 3283 /* assemble new additional is (if any) */ 3284 if (nis) { 3285 PetscInt **temp_idxs,*count_is,j,psum; 3286 3287 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3288 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3289 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3290 ptr_idxs = recv_buffer_idxs_is; 3291 psum = 0; 3292 for (i=0;i<n_recvs;i++) { 3293 for (j=0;j<nis;j++) { 3294 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3295 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3296 psum += plen; 3297 ptr_idxs += plen+1; /* shift pointer to received data */ 3298 } 3299 } 3300 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3301 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3302 for (i=1;i<nis;i++) { 3303 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3304 } 3305 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3306 ptr_idxs = recv_buffer_idxs_is; 3307 for (i=0;i<n_recvs;i++) { 3308 for (j=0;j<nis;j++) { 3309 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3310 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3311 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3312 ptr_idxs += plen+1; /* shift pointer to received data */ 3313 } 3314 } 3315 for (i=0;i<nis;i++) { 3316 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3317 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3318 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3319 } 3320 ierr = PetscFree(count_is);CHKERRQ(ierr); 3321 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3322 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3323 } 3324 /* free workspace */ 3325 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3326 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3327 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3328 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3329 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3330 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3331 if (isdense) { 3332 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3333 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3334 } else { 3335 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3336 } 3337 if (nis) { 3338 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3339 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3340 } 3341 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3342 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3343 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3344 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3345 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3346 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3347 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3348 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3349 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3350 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3351 ierr = PetscFree(onodes);CHKERRQ(ierr); 3352 if (nis) { 3353 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3354 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3355 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3356 } 3357 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3358 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3359 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3360 for (i=0;i<nis;i++) { 3361 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3362 } 3363 } 3364 PetscFunctionReturn(0); 3365 } 3366 3367 /* temporary hack into ksp private data structure */ 3368 #include <petsc-private/kspimpl.h> 3369 3370 #undef __FUNCT__ 3371 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3372 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3373 { 3374 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3375 PC_IS *pcis = (PC_IS*)pc->data; 3376 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3377 MatNullSpace CoarseNullSpace=NULL; 3378 ISLocalToGlobalMapping coarse_islg; 3379 IS coarse_is,*isarray; 3380 PetscInt i,im_active=-1,active_procs=-1; 3381 PetscInt nis,nisdofs,nisneu; 3382 PC pc_temp; 3383 PCType coarse_pc_type; 3384 KSPType coarse_ksp_type; 3385 PetscBool multilevel_requested,multilevel_allowed; 3386 PetscBool setsym,issym,isherm,isbddc,isnn,coarse_reuse; 3387 MatStructure matstruct; 3388 Mat t_coarse_mat_is; 3389 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3390 PetscMPIInt all_procs; 3391 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3392 PetscErrorCode ierr; 3393 3394 PetscFunctionBegin; 3395 /* Assign global numbering to coarse dofs */ 3396 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3397 PetscInt ocoarse_size; 3398 ocoarse_size = pcbddc->coarse_size; 3399 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3400 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3401 /* see if we can avoid some work */ 3402 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3403 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3404 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3405 coarse_reuse = PETSC_FALSE; 3406 } else { /* we can safely reuse already computed coarse matrix */ 3407 coarse_reuse = PETSC_TRUE; 3408 } 3409 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3410 coarse_reuse = PETSC_FALSE; 3411 } 3412 /* reset any subassembling information */ 3413 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3414 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3415 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3416 coarse_reuse = PETSC_TRUE; 3417 } 3418 3419 /* count "active" (i.e. with positive local size) and "void" processes */ 3420 im_active = !!(pcis->n); 3421 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3422 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3423 void_procs = all_procs-active_procs; 3424 csin_type_simple = PETSC_TRUE; 3425 if (pcbddc->current_level) { 3426 csin_ml = PETSC_TRUE; 3427 ncoarse_ml = void_procs; 3428 csin_ds = PETSC_TRUE; 3429 ncoarse_ds = void_procs; 3430 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3431 } else { 3432 csin_ml = PETSC_FALSE; 3433 ncoarse_ml = all_procs; 3434 if (void_procs) { 3435 csin_ds = PETSC_TRUE; 3436 ncoarse_ds = void_procs; 3437 csin_type_simple = PETSC_FALSE; 3438 } else { 3439 csin_ds = PETSC_FALSE; 3440 ncoarse_ds = all_procs; 3441 } 3442 } 3443 3444 /* 3445 test if we can go multilevel: three conditions must be satisfied: 3446 - we have not exceeded the number of levels requested 3447 - we can actually subassemble the active processes 3448 - we can find a suitable number of MPI processes where we can place the subassembled problem 3449 */ 3450 multilevel_allowed = PETSC_FALSE; 3451 multilevel_requested = PETSC_FALSE; 3452 if (pcbddc->current_level < pcbddc->max_levels) { 3453 multilevel_requested = PETSC_TRUE; 3454 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3455 multilevel_allowed = PETSC_FALSE; 3456 } else { 3457 multilevel_allowed = PETSC_TRUE; 3458 } 3459 } 3460 /* determine number of process partecipating to coarse solver */ 3461 if (multilevel_allowed) { 3462 ncoarse = ncoarse_ml; 3463 csin = csin_ml; 3464 } else { 3465 ncoarse = ncoarse_ds; 3466 csin = csin_ds; 3467 } 3468 3469 /* creates temporary l2gmap and IS for coarse indexes */ 3470 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3471 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3472 3473 /* creates temporary MATIS object for coarse matrix */ 3474 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3475 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3476 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3477 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3478 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3479 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3480 3481 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3482 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3483 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3484 const PetscInt *idxs; 3485 ISLocalToGlobalMapping tmap; 3486 3487 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3488 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3489 /* allocate space for temporary storage */ 3490 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3491 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3492 /* allocate for IS array */ 3493 nisdofs = pcbddc->n_ISForDofsLocal; 3494 nisneu = !!pcbddc->NeumannBoundariesLocal; 3495 nis = nisdofs + nisneu; 3496 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3497 /* dofs splitting */ 3498 for (i=0;i<nisdofs;i++) { 3499 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3500 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3501 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3502 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3503 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3504 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3506 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3507 } 3508 /* neumann boundaries */ 3509 if (pcbddc->NeumannBoundariesLocal) { 3510 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3511 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3512 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3513 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3514 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3515 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3516 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3517 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3518 } 3519 /* free memory */ 3520 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3521 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3522 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3523 } else { 3524 nis = 0; 3525 nisdofs = 0; 3526 nisneu = 0; 3527 isarray = NULL; 3528 } 3529 /* destroy no longer needed map */ 3530 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3531 3532 /* restrict on coarse candidates (if needed) */ 3533 coarse_mat_is = NULL; 3534 if (csin) { 3535 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3536 PetscInt j,tissize,*nisindices; 3537 PetscInt *coarse_candidates; 3538 const PetscInt* tisindices; 3539 /* get coarse candidates' ranks in pc communicator */ 3540 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3541 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3542 for (i=0,j=0;i<all_procs;i++) { 3543 if (!coarse_candidates[i]) { 3544 coarse_candidates[j]=i; 3545 j++; 3546 } 3547 } 3548 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3549 /* get a suitable subassembling pattern */ 3550 if (csin_type_simple) { 3551 PetscMPIInt rank; 3552 PetscInt issize,isidx; 3553 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3554 if (im_active) { 3555 issize = 1; 3556 isidx = (PetscInt)rank; 3557 } else { 3558 issize = 0; 3559 isidx = -1; 3560 } 3561 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3562 } else { 3563 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3564 } 3565 if (pcbddc->dbg_flag) { 3566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3568 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3569 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3570 for (i=0;i<j;i++) { 3571 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3572 } 3573 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3574 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3575 } 3576 /* shift the pattern on coarse candidates */ 3577 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3578 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3579 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3580 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3581 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3582 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3583 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3584 } 3585 if (pcbddc->dbg_flag) { 3586 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3588 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3590 } 3591 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3592 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3593 } else { 3594 if (pcbddc->dbg_flag) { 3595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3596 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3597 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3598 } 3599 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3600 coarse_mat_is = t_coarse_mat_is; 3601 } 3602 3603 /* create local to global scatters for coarse problem */ 3604 if (pcbddc->new_primal_space) { 3605 PetscInt lrows; 3606 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3607 if (coarse_mat_is) { 3608 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3609 } else { 3610 lrows = 0; 3611 } 3612 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3613 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3614 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3615 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3616 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3617 } 3618 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3619 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3620 3621 /* set defaults for coarse KSP and PC */ 3622 if (multilevel_allowed) { 3623 coarse_ksp_type = KSPRICHARDSON; 3624 coarse_pc_type = PCBDDC; 3625 } else { 3626 coarse_ksp_type = KSPPREONLY; 3627 coarse_pc_type = PCREDUNDANT; 3628 } 3629 3630 /* print some info if requested */ 3631 if (pcbddc->dbg_flag) { 3632 if (!multilevel_allowed) { 3633 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3634 if (multilevel_requested) { 3635 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3636 } else if (pcbddc->max_levels) { 3637 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3638 } 3639 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3640 } 3641 } 3642 3643 /* create the coarse KSP object only once with defaults */ 3644 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3645 issym = PETSC_FALSE; 3646 isherm = PETSC_FALSE; 3647 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3648 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3649 if (coarse_mat_is) { 3650 MatReuse coarse_mat_reuse; 3651 PetscViewer dbg_viewer; 3652 if (pcbddc->dbg_flag) { 3653 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3654 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3655 } 3656 if (!pcbddc->coarse_ksp) { 3657 char prefix[256],str_level[16]; 3658 size_t len; 3659 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3660 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3661 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3662 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr); 3663 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3664 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3665 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3666 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3667 /* prefix */ 3668 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3669 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3670 if (!pcbddc->current_level) { 3671 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3672 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3673 } else { 3674 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3675 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3676 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3677 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3678 *(prefix+len)='\0'; 3679 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3680 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3681 } 3682 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3683 /* allow user customization */ 3684 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3685 } 3686 3687 /* get some info after set from options */ 3688 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3689 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3690 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3691 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3692 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3693 isbddc = PETSC_FALSE; 3694 } 3695 3696 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3697 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3698 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3699 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3700 if (nisdofs) { 3701 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3702 for (i=0;i<nisdofs;i++) { 3703 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3704 } 3705 } 3706 if (nisneu) { 3707 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3708 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3709 } 3710 3711 /* assemble coarse matrix */ 3712 if (coarse_reuse) { 3713 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3714 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3715 coarse_mat_reuse = MAT_REUSE_MATRIX; 3716 } else { 3717 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3718 } 3719 if (isbddc || isnn) { 3720 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3721 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3722 if (pcbddc->dbg_flag) { 3723 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3724 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3725 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3726 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3727 } 3728 } 3729 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3730 } else { 3731 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3732 } 3733 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3734 3735 /* propagate symmetry info to coarse matrix */ 3736 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3737 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3738 3739 /* set operators */ 3740 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3741 if (pcbddc->dbg_flag) { 3742 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3743 } 3744 } else { /* processes non partecipating to coarse solver (if any) */ 3745 coarse_mat = 0; 3746 } 3747 ierr = PetscFree(isarray);CHKERRQ(ierr); 3748 3749 /* temporary disabled code */ 3750 #if 0 3751 /* Compute coarse null space (special handling by BDDC only) */ 3752 if (pcbddc->NullSpace) { 3753 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3754 if (isbddc) { 3755 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3756 } else { 3757 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3758 } 3759 } 3760 #endif 3761 3762 if (pcbddc->coarse_ksp) { 3763 PetscBool ispreonly; 3764 /* setup coarse ksp */ 3765 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3766 /* hack */ 3767 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_rhs),&((pcbddc->coarse_ksp)->vec_sol));CHKERRQ(ierr); 3768 3769 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3770 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3771 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3772 KSP check_ksp; 3773 KSPType check_ksp_type; 3774 PC check_pc; 3775 Vec check_vec,coarse_vec; 3776 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3777 PetscInt its; 3778 PetscBool compute_eigs; 3779 PetscReal *eigs_r,*eigs_c; 3780 PetscInt neigs; 3781 3782 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3783 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3784 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3785 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3786 if (ispreonly) { 3787 check_ksp_type = KSPPREONLY; 3788 compute_eigs = PETSC_FALSE; 3789 } else { 3790 check_ksp_type = KSPGMRES; 3791 compute_eigs = PETSC_TRUE; 3792 } 3793 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3794 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3795 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3796 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3797 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3798 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3799 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3800 /* create random vec */ 3801 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3802 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3803 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3804 if (CoarseNullSpace) { 3805 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3806 } 3807 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3808 /* solve coarse problem */ 3809 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3810 if (CoarseNullSpace) { 3811 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3812 } 3813 /* set eigenvalue estimation if preonly has not been requested */ 3814 if (compute_eigs) { 3815 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3816 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3817 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3818 lambda_max = eigs_r[neigs-1]; 3819 lambda_min = eigs_r[0]; 3820 if (pcbddc->use_coarse_estimates) { 3821 if (lambda_max>lambda_min) { 3822 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3823 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3824 } 3825 } 3826 } 3827 3828 /* check coarse problem residual error */ 3829 if (pcbddc->dbg_flag) { 3830 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3831 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3832 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3833 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3834 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3835 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3836 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3837 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3838 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3839 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3840 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3841 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3842 if (compute_eigs) { 3843 PetscReal lambda_max_s,lambda_min_s; 3844 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3845 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3846 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3847 for (i=0;i<neigs;i++) { 3848 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3849 } 3850 } 3851 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3852 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3853 } 3854 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3855 if (compute_eigs) { 3856 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3857 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3858 } 3859 } 3860 } 3861 /* print additional info */ 3862 if (pcbddc->dbg_flag) { 3863 /* waits until all processes reaches this point */ 3864 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3865 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3866 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3867 } 3868 3869 /* free memory */ 3870 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3871 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3872 PetscFunctionReturn(0); 3873 } 3874 3875 #undef __FUNCT__ 3876 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3877 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3878 { 3879 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3880 PC_IS* pcis = (PC_IS*)pc->data; 3881 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3882 PetscInt i,coarse_size; 3883 PetscInt *local_primal_indices; 3884 PetscErrorCode ierr; 3885 3886 PetscFunctionBegin; 3887 /* Compute global number of coarse dofs */ 3888 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3889 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3890 } 3891 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3892 3893 /* check numbering */ 3894 if (pcbddc->dbg_flag) { 3895 PetscScalar coarsesum,*array; 3896 3897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3898 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3899 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3900 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3901 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3902 for (i=0;i<pcbddc->local_primal_size;i++) { 3903 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3904 } 3905 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3906 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3907 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3908 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3909 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3910 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3911 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3912 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3913 for (i=0;i<pcis->n;i++) { 3914 if (array[i] == 1.0) { 3915 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3916 } 3917 } 3918 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3919 for (i=0;i<pcis->n;i++) { 3920 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3921 } 3922 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3923 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3924 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3925 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3926 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3927 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3928 if (pcbddc->dbg_flag > 1) { 3929 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3930 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3931 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3932 for (i=0;i<pcbddc->local_primal_size;i++) { 3933 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3934 } 3935 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3936 } 3937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3938 } 3939 /* get back data */ 3940 *coarse_size_n = coarse_size; 3941 *local_primal_indices_n = local_primal_indices; 3942 PetscFunctionReturn(0); 3943 } 3944 3945 #undef __FUNCT__ 3946 #define __FUNCT__ "PCBDDCGlobalToLocal" 3947 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3948 { 3949 PC_IS* pcis = (PC_IS*)pc->data; 3950 IS localis_t; 3951 PetscInt i,lsize,*idxs; 3952 PetscScalar *vals; 3953 PetscErrorCode ierr; 3954 3955 PetscFunctionBegin; 3956 /* get dirichlet indices in local ordering exploiting local to global map */ 3957 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3958 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3959 for (i=0;i<lsize;i++) vals[i] = 1.0; 3960 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3961 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3962 if (idxs) { /* multilevel guard */ 3963 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3964 } 3965 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3966 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3967 ierr = PetscFree(vals);CHKERRQ(ierr); 3968 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3969 /* now compute dirichlet set in local ordering */ 3970 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3971 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3972 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3973 for (i=0,lsize=0;i<pcis->n;i++) { 3974 if (vals[i] == 1.0) { 3975 lsize++; 3976 } 3977 } 3978 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3979 for (i=0,lsize=0;i<pcis->n;i++) { 3980 if (vals[i] == 1.0) { 3981 idxs[lsize++] = i; 3982 } 3983 } 3984 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3985 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3986 *localis = localis_t; 3987 PetscFunctionReturn(0); 3988 } 3989