1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscErrorCode ierr; 50 51 PetscFunctionBegin; 52 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 53 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 54 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 59 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 62 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 63 PetscFunctionReturn(0); 64 } 65 66 #undef __FUNCT__ 67 #define __FUNCT__ "PCBDDCResetTopography" 68 PetscErrorCode PCBDDCResetTopography(PC pc) 69 { 70 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 71 PetscErrorCode ierr; 72 73 PetscFunctionBegin; 74 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 75 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 76 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCResetSolvers" 82 PetscErrorCode PCBDDCResetSolvers(PC pc) 83 { 84 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 89 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 90 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 91 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 94 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 95 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 97 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 99 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 100 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 101 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 102 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 104 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 105 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 107 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 108 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 109 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 110 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 111 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_constraints,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 131 n_R = pcis->n-pcbddc->n_actual_vertices; 132 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 133 /* local work vectors (try to avoid unneeded work)*/ 134 /* R nodes */ 135 old_size = -1; 136 if (pcbddc->vec1_R) { 137 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 138 } 139 if (n_R != old_size) { 140 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 141 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 142 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 144 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 146 } 147 /* local primal dofs */ 148 old_size = -1; 149 if (pcbddc->vec1_P) { 150 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 151 } 152 if (pcbddc->local_primal_size != old_size) { 153 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 154 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 155 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 156 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 157 } 158 /* local explicit constraints */ 159 old_size = -1; 160 if (pcbddc->vec1_C) { 161 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 162 } 163 if (n_constraints && n_constraints != old_size) { 164 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 165 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 166 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 167 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCSetUpCorrection" 174 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 175 { 176 PetscErrorCode ierr; 177 /* pointers to pcis and pcbddc */ 178 PC_IS* pcis = (PC_IS*)pc->data; 179 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 180 /* submatrices of local problem */ 181 Mat A_RV,A_VR,A_VV; 182 /* working matrices */ 183 Mat M1,M2,M3,C_CR; 184 /* working vectors */ 185 Vec vec1_C,vec2_C,vec1_V,vec2_V; 186 /* additional working stuff */ 187 IS is_aux; 188 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 189 const PetscScalar *array,*row_cmat_values; 190 const PetscInt *row_cmat_indices,*idx_R_local; 191 PetscInt *idx_V_B,*auxindices; 192 PetscInt n_vertices,n_constraints,size_of_constraint; 193 PetscInt i,j,n_R,n_D,n_B; 194 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE,unsymmetric_check; 195 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 196 MatType impMatType; 197 /* some shortcuts to scalars */ 198 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 199 /* for debugging purposes */ 200 PetscReal *coarsefunctions_errors,*constraints_errors; 201 202 PetscFunctionBegin; 203 /* get number of vertices (corners plus constraints with change of basis) 204 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 205 n_vertices = pcbddc->n_actual_vertices; 206 n_constraints = pcbddc->local_primal_size-n_vertices; 207 /* Set Non-overlapping dimensions */ 208 n_B = pcis->n_B; n_D = pcis->n - n_B; 209 n_R = pcis->n-n_vertices; 210 211 /* Set types for local objects needed by BDDC precondtioner */ 212 impMatType = MATSEQDENSE; 213 214 /* Allocating some extra storage just to be safe */ 215 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 216 for (i=0;i<pcis->n;i++) auxindices[i]=i; 217 218 /* vertices in boundary numbering */ 219 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 220 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 221 if (i != n_vertices) { 222 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 223 } 224 225 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 226 if (n_constraints) { 227 /* see if we can save some allocations */ 228 if (pcbddc->local_auxmat2) { 229 PetscInt on_R,on_constraints; 230 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 231 if (on_R != n_R || on_constraints != n_constraints) { 232 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 233 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 234 } 235 } 236 /* work vectors */ 237 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 239 /* auxiliary matrices */ 240 if (!pcbddc->local_auxmat2) { 241 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 242 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 243 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 244 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 245 } 246 247 /* Extract constraints on R nodes: C_{CR} */ 248 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 249 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 250 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 251 252 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 253 for (i=0;i<n_constraints;i++) { 254 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 255 /* Get row of constraint matrix in R numbering */ 256 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 257 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 258 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 259 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 260 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 261 /* Solve for row of constraint matrix in R numbering */ 262 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 263 /* Set values in local_auxmat2 */ 264 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 265 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 267 } 268 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 269 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 271 272 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 273 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 274 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 275 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 276 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 277 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 278 ierr = MatSetUp(M1);CHKERRQ(ierr); 279 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 280 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 281 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 282 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 283 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 284 ierr = MatDestroy(&M2);CHKERRQ(ierr); 285 ierr = MatDestroy(&M3);CHKERRQ(ierr); 286 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 287 if (!pcbddc->local_auxmat1) { 288 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 289 } else { 290 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 291 } 292 } 293 294 /* Get submatrices from subdomain matrix */ 295 if (n_vertices) { 296 PetscInt ibs,mbs; 297 PetscBool issbaij; 298 Mat newmat; 299 300 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 301 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 302 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 303 if (ibs != mbs) { /* need to convert to SEQAIJ */ 304 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 305 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 308 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 309 } else { 310 /* this is safe */ 311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 312 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 313 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 314 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 315 /* which of the two approaches is faster? */ 316 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 317 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 318 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 319 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 320 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 321 } else { 322 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 323 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 324 } 325 } 326 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 327 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 328 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 329 } 330 331 /* Matrix of coarse basis functions (local) */ 332 if (pcbddc->coarse_phi_B) { 333 PetscInt on_B,on_primal; 334 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 335 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 336 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 337 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 338 } 339 } 340 if (pcbddc->coarse_phi_D) { 341 PetscInt on_D,on_primal; 342 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 343 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 344 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 345 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 346 } 347 } 348 if (!pcbddc->coarse_phi_B) { 349 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 350 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 351 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 352 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 353 } 354 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 355 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 356 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 357 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 358 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 359 } 360 361 if (pcbddc->dbg_flag) { 362 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 363 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 364 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 365 } 366 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 367 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 368 369 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 370 371 /* vertices */ 372 for (i=0;i<n_vertices;i++) { 373 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 374 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 375 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 376 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 377 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 378 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 379 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 380 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 381 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 383 if (n_constraints) { 384 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 385 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 386 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 387 } 388 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 389 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 390 391 /* Set values in coarse basis function and subdomain part of coarse_mat */ 392 /* coarse basis functions */ 393 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 394 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 395 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 397 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 399 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 400 if (pcbddc->switch_static || pcbddc->dbg_flag) { 401 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 402 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 404 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 405 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 406 } 407 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 408 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 409 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 410 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 411 if (n_constraints) { 412 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 413 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 414 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 415 } 416 417 /* check */ 418 if (pcbddc->dbg_flag) { 419 /* assemble subdomain vector on local nodes */ 420 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 421 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 422 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 423 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 424 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 425 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 426 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 427 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 428 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 429 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 430 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 432 if (n_constraints) { 433 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 434 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 436 } 437 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 438 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 440 /* check saddle point solution */ 441 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 442 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 444 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 445 /* shift by the identity matrix */ 446 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 447 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 448 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 450 } 451 } 452 453 /* constraints */ 454 for (i=0;i<n_constraints;i++) { 455 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 456 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 457 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 458 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 459 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 460 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 461 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 462 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 463 if (n_vertices) { 464 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 465 } 466 /* Set values in coarse basis function and subdomain part of coarse_mat */ 467 /* coarse basis functions */ 468 j = i+n_vertices; /* don't touch this! */ 469 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 470 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 471 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 473 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 474 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 475 if (pcbddc->switch_static || pcbddc->dbg_flag) { 476 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 477 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 479 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 480 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 481 } 482 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 483 if (n_vertices) { 484 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 485 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 486 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 487 } 488 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 489 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 491 492 if (pcbddc->dbg_flag) { 493 /* assemble subdomain vector on nodes */ 494 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 495 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 496 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 498 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 499 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 500 /* assemble subdomain vector of lagrange multipliers */ 501 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 502 if (n_vertices) { 503 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 504 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 505 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 506 } 507 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 508 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 509 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 510 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 511 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 513 /* check saddle point solution */ 514 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 515 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 517 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 518 /* shift by the identity matrix */ 519 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 520 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 521 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 523 } 524 } 525 /* call assembling routines for local coarse basis */ 526 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 if (pcbddc->switch_static || pcbddc->dbg_flag) { 529 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 530 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 } 532 533 /* compute other basis functions for non-symmetric problems */ 534 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 535 if (!setsym || (setsym && !issym)) { 536 if (!pcbddc->coarse_psi_B) { 537 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 538 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 539 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 540 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 541 } 542 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 543 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 544 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 545 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 546 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 547 } 548 for (i=0;i<pcbddc->local_primal_size;i++) { 549 if (n_constraints) { 550 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 551 for (j=0;j<n_constraints;j++) { 552 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 553 } 554 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 555 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 556 } 557 if (i<n_vertices) { 558 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 559 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 560 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 561 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 562 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 563 if (n_constraints) { 564 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 565 } 566 } else { 567 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 568 } 569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 570 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 571 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 572 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 574 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 575 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 576 if (i<n_vertices) { 577 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 578 } 579 if (pcbddc->switch_static || pcbddc->dbg_flag) { 580 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 581 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 583 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 584 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 585 } 586 587 if (pcbddc->dbg_flag) { 588 /* assemble subdomain vector on nodes */ 589 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 590 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 591 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 592 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 593 if (i<n_vertices) { 594 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr); 595 } 596 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 597 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 598 /* assemble subdomain vector of lagrange multipliers */ 599 for (j=0;j<pcbddc->local_primal_size;j++) { 600 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 601 } 602 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 603 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 604 /* check saddle point solution */ 605 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 606 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 608 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 609 /* shift by the identity matrix */ 610 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 611 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 612 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 614 } 615 } 616 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 617 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 if (pcbddc->switch_static || pcbddc->dbg_flag) { 619 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 620 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 } 622 unsymmetric_check = PETSC_TRUE; 623 } else { /* take references to already computed coarse basis */ 624 unsymmetric_check = PETSC_FALSE; 625 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 626 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 627 if (pcbddc->coarse_phi_D) { 628 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 629 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 630 } 631 } 632 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 633 /* Checking coarse_sub_mat and coarse basis functios */ 634 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 635 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 636 if (pcbddc->dbg_flag) { 637 Mat coarse_sub_mat; 638 Mat AUXMAT,TM1,TM2,TM3,TM4; 639 Mat coarse_phi_D,coarse_phi_B; 640 Mat coarse_psi_D,coarse_psi_B; 641 Mat A_II,A_BB,A_IB,A_BI; 642 MatType checkmattype=MATSEQAIJ; 643 PetscReal real_value; 644 645 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 646 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 647 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 648 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 649 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 650 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 651 if (unsymmetric_check) { 652 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 653 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 654 } 655 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 656 657 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 660 if (unsymmetric_check) { 661 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 662 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 663 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 664 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 665 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 666 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 667 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 668 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 669 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 670 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 671 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 672 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 673 } else { 674 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 675 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 676 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 677 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 678 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 679 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 680 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 681 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 682 } 683 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 684 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 685 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 686 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 687 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 688 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 689 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 690 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 692 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 693 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 694 for (i=0;i<pcbddc->local_primal_size;i++) { 695 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 696 } 697 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 698 for (i=0;i<pcbddc->local_primal_size;i++) { 699 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 700 } 701 if (unsymmetric_check) { 702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 703 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 705 } 706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 707 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 709 } 710 } 711 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 712 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 713 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 714 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 715 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 716 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 717 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 718 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 719 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 720 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 721 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 722 if (unsymmetric_check) { 723 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 724 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 725 } 726 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 727 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 728 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 729 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 730 } 731 /* free memory */ 732 if (n_vertices) { 733 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 734 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 735 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 736 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 737 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 738 } 739 if (n_constraints) { 740 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 741 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 742 ierr = MatDestroy(&M1);CHKERRQ(ierr); 743 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 744 } 745 ierr = PetscFree(auxindices);CHKERRQ(ierr); 746 /* get back data */ 747 *coarse_submat_vals_n = coarse_submat_vals; 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 753 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 754 { 755 PC_IS* pcis = (PC_IS*)(pc->data); 756 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 757 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 758 PetscBool issbaij,isseqaij; 759 /* manage repeated solves */ 760 MatReuse reuse; 761 MatStructure matstruct; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 766 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 767 } 768 /* get mat flags */ 769 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 770 reuse = MAT_INITIAL_MATRIX; 771 if (pc->setupcalled) { 772 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 773 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 774 if (matstruct == SAME_NONZERO_PATTERN) { 775 reuse = MAT_REUSE_MATRIX; 776 } else { 777 reuse = MAT_INITIAL_MATRIX; 778 } 779 } 780 if (reuse == MAT_INITIAL_MATRIX) { 781 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 782 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 783 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 784 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 785 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 786 } 787 788 /* transform local matrices if needed */ 789 if (pcbddc->use_change_of_basis) { 790 Mat change_mat_all; 791 PetscScalar *row_cmat_values; 792 PetscInt *row_cmat_indices; 793 PetscInt *nnz,*is_indices,*temp_indices; 794 PetscInt i,j,k,n_D,n_B; 795 796 /* Get Non-overlapping dimensions */ 797 n_B = pcis->n_B; 798 n_D = pcis->n-n_B; 799 800 /* compute nonzero structure of change of basis on all local nodes */ 801 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 802 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 803 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 804 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 805 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 806 k=1; 807 for (i=0;i<n_B;i++) { 808 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 809 nnz[is_indices[i]]=j; 810 if (k < j) k = j; 811 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 812 } 813 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 814 /* assemble change of basis matrix on the whole set of local dofs */ 815 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 816 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 817 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 818 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 819 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 820 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 821 for (i=0;i<n_D;i++) { 822 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 823 } 824 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 825 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 826 for (i=0;i<n_B;i++) { 827 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 828 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 829 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 830 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 831 } 832 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 833 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 834 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 835 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isseqaij);CHKERRQ(ierr); 836 if (isseqaij) { 837 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 838 } else { 839 Mat work_mat; 840 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 841 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 842 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 843 } 844 /* 845 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 846 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 847 */ 848 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 849 ierr = PetscFree(nnz);CHKERRQ(ierr); 850 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 851 } else { 852 /* without change of basis, the local matrix is unchanged */ 853 if (!pcbddc->local_mat) { 854 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 855 pcbddc->local_mat = matis->A; 856 } 857 } 858 859 /* get submatrices */ 860 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 861 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 862 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 863 if (!issbaij) { 864 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 865 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 866 } else { 867 Mat newmat; 868 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 869 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 870 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 871 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 872 } 873 PetscFunctionReturn(0); 874 } 875 876 #undef __FUNCT__ 877 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 878 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 879 { 880 PC_IS* pcis = (PC_IS*)(pc->data); 881 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 882 IS is_aux1,is_aux2; 883 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 884 PetscInt n_vertices,i,j,n_R,n_D,n_B; 885 PetscInt vbs,bs; 886 PetscBT bitmask; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 /* 891 No need to setup local scatters if 892 - primal space is unchanged 893 AND 894 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 895 AND 896 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 897 */ 898 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 899 PetscFunctionReturn(0); 900 } 901 /* destroy old objects */ 902 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 903 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 904 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 905 /* Set Non-overlapping dimensions */ 906 n_B = pcis->n_B; n_D = pcis->n - n_B; 907 n_vertices = pcbddc->n_actual_vertices; 908 /* create auxiliary bitmask */ 909 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 910 for (i=0;i<n_vertices;i++) { 911 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 912 } 913 914 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 915 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 916 for (i=0, n_R=0; i<pcis->n; i++) { 917 if (!PetscBTLookup(bitmask,i)) { 918 idx_R_local[n_R] = i; 919 n_R++; 920 } 921 } 922 923 /* Block code */ 924 vbs = 1; 925 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 926 if (bs>1 && !(n_vertices%bs)) { 927 PetscBool is_blocked = PETSC_TRUE; 928 PetscInt *vary; 929 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 930 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 931 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 932 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 933 for (i=0; i<n_vertices; i++) { 934 if (vary[i]!=0 && vary[i]!=bs) { 935 is_blocked = PETSC_FALSE; 936 break; 937 } 938 } 939 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 940 vbs = bs; 941 for (i=0;i<n_R/vbs;i++) { 942 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 943 } 944 } 945 ierr = PetscFree(vary);CHKERRQ(ierr); 946 } 947 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 948 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 949 950 /* print some info if requested */ 951 if (pcbddc->dbg_flag) { 952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 953 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 954 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 955 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 956 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 957 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 958 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 959 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 960 } 961 962 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 963 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 964 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 965 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 966 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 967 for (i=0; i<n_D; i++) { 968 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 969 } 970 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 971 for (i=0, j=0; i<n_R; i++) { 972 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 973 aux_array1[j++] = i; 974 } 975 } 976 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 977 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 978 for (i=0, j=0; i<n_B; i++) { 979 if (!PetscBTLookup(bitmask,is_indices[i])) { 980 aux_array2[j++] = i; 981 } 982 } 983 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 984 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 985 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 986 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 987 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 988 989 if (pcbddc->switch_static || pcbddc->dbg_flag) { 990 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 991 for (i=0, j=0; i<n_R; i++) { 992 if (PetscBTLookup(bitmask,idx_R_local[i])) { 993 aux_array1[j++] = i; 994 } 995 } 996 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 997 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 998 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 999 } 1000 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1001 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1008 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 1009 { 1010 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1011 PC_IS *pcis = (PC_IS*)pc->data; 1012 PC pc_temp; 1013 Mat A_RR; 1014 MatStructure matstruct; 1015 MatReuse reuse; 1016 PetscScalar m_one = -1.0; 1017 PetscReal value; 1018 PetscInt n_D,n_R,ibs,mbs; 1019 PetscBool use_exact,use_exact_reduced,issbaij; 1020 PetscErrorCode ierr; 1021 /* prefixes stuff */ 1022 char dir_prefix[256],neu_prefix[256],str_level[16]; 1023 size_t len; 1024 1025 PetscFunctionBegin; 1026 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1027 1028 /* compute prefixes */ 1029 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1030 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1031 if (!pcbddc->current_level) { 1032 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1033 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1034 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1035 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1036 } else { 1037 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1038 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1039 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1040 len -= 15; /* remove "pc_bddc_coarse_" */ 1041 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1042 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1043 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1044 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1045 *(dir_prefix+len)='\0'; 1046 *(neu_prefix+len)='\0'; 1047 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1048 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1049 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1050 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1051 } 1052 1053 /* DIRICHLET PROBLEM */ 1054 /* Matrix for Dirichlet problem is pcis->A_II */ 1055 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1056 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1057 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1058 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1059 /* default */ 1060 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1061 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1062 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1063 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1064 if (issbaij) { 1065 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1066 } else { 1067 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1068 } 1069 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1070 /* Allow user's customization */ 1071 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1072 } 1073 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1074 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1075 if (!n_D) { 1076 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1077 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1078 } 1079 /* Set Up KSP for Dirichlet problem of BDDC */ 1080 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1081 /* set ksp_D into pcis data */ 1082 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1084 pcis->ksp_D = pcbddc->ksp_D; 1085 1086 /* NEUMANN PROBLEM */ 1087 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1088 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1089 if (pcbddc->ksp_R) { /* already created ksp */ 1090 PetscInt nn_R; 1091 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1092 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1093 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1094 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1095 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1096 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1097 reuse = MAT_INITIAL_MATRIX; 1098 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1099 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1100 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1101 reuse = MAT_INITIAL_MATRIX; 1102 } else { /* safe to reuse the matrix */ 1103 reuse = MAT_REUSE_MATRIX; 1104 } 1105 } 1106 /* last check */ 1107 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1108 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1109 reuse = MAT_INITIAL_MATRIX; 1110 } 1111 } else { /* first time, so we need to create the matrix */ 1112 reuse = MAT_INITIAL_MATRIX; 1113 } 1114 /* extract A_RR */ 1115 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1116 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1117 if (ibs != mbs) { 1118 Mat newmat; 1119 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1120 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1121 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1122 } else { 1123 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1124 } 1125 if (!pcbddc->ksp_R) { /* create object if not present */ 1126 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1127 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1128 /* default */ 1129 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1130 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1131 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1132 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1133 if (issbaij) { 1134 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1135 } else { 1136 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1137 } 1138 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1139 /* Allow user's customization */ 1140 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1141 } 1142 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1143 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1144 if (!n_R) { 1145 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1146 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1147 } 1148 /* Set Up KSP for Neumann problem of BDDC */ 1149 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1150 1151 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1152 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1153 /* Dirichlet */ 1154 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1155 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1156 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1157 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1158 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1159 /* need to be adapted? */ 1160 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1161 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1162 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1163 /* print info */ 1164 if (pcbddc->dbg_flag) { 1165 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1166 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1168 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1170 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1171 } 1172 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1173 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1174 } 1175 1176 /* Neumann */ 1177 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1178 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1179 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1180 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1181 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1182 /* need to be adapted? */ 1183 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1184 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1185 /* print info */ 1186 if (pcbddc->dbg_flag) { 1187 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1188 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1189 } 1190 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1191 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1192 } 1193 } 1194 /* free Neumann problem's matrix */ 1195 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1201 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1202 { 1203 PetscErrorCode ierr; 1204 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1205 1206 PetscFunctionBegin; 1207 if (applytranspose) { 1208 if (pcbddc->local_auxmat1) { 1209 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1210 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1211 } 1212 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1213 } else { 1214 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1215 if (pcbddc->local_auxmat1) { 1216 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1217 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1218 } 1219 } 1220 PetscFunctionReturn(0); 1221 } 1222 1223 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1226 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1227 { 1228 PetscErrorCode ierr; 1229 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1230 PC_IS* pcis = (PC_IS*) (pc->data); 1231 const PetscScalar zero = 0.0; 1232 1233 PetscFunctionBegin; 1234 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1235 if (applytranspose) { 1236 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1237 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1238 } else { 1239 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1240 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1241 } 1242 /* start communications from local primal nodes to rhs of coarse solver */ 1243 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1244 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 1247 /* Coarse solution -> rhs and sol updated in PCBDDCScattarCoarseDataBegin/End */ 1248 /* TODO remove null space when doing multilevel */ 1249 if (pcbddc->coarse_ksp) { 1250 if (applytranspose) { 1251 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1252 } else { 1253 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1254 } 1255 } 1256 /* start communications from coarse solver solution to local primal nodes */ 1257 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1258 1259 /* Local solution on R nodes */ 1260 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1261 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1262 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1263 if (pcbddc->switch_static) { 1264 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1265 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1266 } 1267 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1268 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1269 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1271 if (pcbddc->switch_static) { 1272 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 } 1275 1276 /* complete communications from coarse sol to local primal nodes */ 1277 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1278 1279 /* Sum contributions from two levels */ 1280 if (applytranspose) { 1281 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1282 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1283 } else { 1284 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1285 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1286 } 1287 PetscFunctionReturn(0); 1288 } 1289 1290 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */ 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1293 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1294 { 1295 PetscErrorCode ierr; 1296 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1297 PetscScalar *array,*array2; 1298 Vec from,to; 1299 1300 PetscFunctionBegin; 1301 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1302 from = pcbddc->coarse_vec; 1303 to = pcbddc->vec1_P; 1304 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1305 Vec tvec; 1306 PetscInt lsize; 1307 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1308 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1309 ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1310 ierr = VecGetArray(from,&array2);CHKERRQ(ierr); 1311 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1312 ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr); 1313 ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr); 1314 } 1315 } else { /* from local to global -> put data in coarse right hand side */ 1316 from = pcbddc->vec1_P; 1317 to = pcbddc->coarse_vec; 1318 } 1319 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1325 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1326 { 1327 PetscErrorCode ierr; 1328 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1329 PetscScalar *array,*array2; 1330 Vec from,to; 1331 1332 PetscFunctionBegin; 1333 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1334 from = pcbddc->coarse_vec; 1335 to = pcbddc->vec1_P; 1336 } else { /* from local to global -> put data in coarse right hand side */ 1337 from = pcbddc->vec1_P; 1338 to = pcbddc->coarse_vec; 1339 } 1340 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1341 if (smode == SCATTER_FORWARD) { 1342 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1343 Vec tvec; 1344 PetscInt lsize; 1345 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1346 ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr); 1347 ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1348 ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr); 1349 ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr); 1350 ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr); 1351 ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr); 1352 } 1353 } 1354 PetscFunctionReturn(0); 1355 } 1356 1357 /* uncomment for testing purposes */ 1358 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1361 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1362 { 1363 PetscErrorCode ierr; 1364 PC_IS* pcis = (PC_IS*)(pc->data); 1365 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1366 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1367 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1368 MatType impMatType=MATSEQAIJ; 1369 /* one and zero */ 1370 PetscScalar one=1.0,zero=0.0; 1371 /* space to store constraints and their local indices */ 1372 PetscScalar *temp_quadrature_constraint; 1373 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1374 /* iterators */ 1375 PetscInt i,j,k,total_counts,temp_start_ptr; 1376 /* stuff to store connected components stored in pcbddc->mat_graph */ 1377 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1378 PetscInt n_ISForFaces,n_ISForEdges; 1379 /* near null space stuff */ 1380 MatNullSpace nearnullsp; 1381 const Vec *nearnullvecs; 1382 Vec *localnearnullsp; 1383 PetscBool nnsp_has_cnst; 1384 PetscInt nnsp_size; 1385 PetscScalar *array; 1386 /* BLAS integers */ 1387 PetscBLASInt lwork,lierr; 1388 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1389 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1390 /* LAPACK working arrays for SVD or POD */ 1391 PetscBool skip_lapack; 1392 PetscScalar *work; 1393 PetscReal *singular_vals; 1394 #if defined(PETSC_USE_COMPLEX) 1395 PetscReal *rwork; 1396 #endif 1397 #if defined(PETSC_MISSING_LAPACK_GESVD) 1398 PetscBLASInt Blas_one_2=1; 1399 PetscScalar *temp_basis,*correlation_mat; 1400 #else 1401 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1402 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1403 #endif 1404 /* reuse */ 1405 PetscInt olocal_primal_size; 1406 PetscInt *oprimal_indices_local_idxs; 1407 /* change of basis */ 1408 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1409 PetscBool boolforchange,qr_needed; 1410 PetscBT touched,change_basis,qr_needed_idx; 1411 /* auxiliary stuff */ 1412 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1413 /* some quantities */ 1414 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1415 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1416 1417 1418 PetscFunctionBegin; 1419 /* Destroy Mat objects computed previously */ 1420 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1421 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1422 /* Get index sets for faces, edges and vertices from graph */ 1423 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1424 pcbddc->use_vertices = PETSC_TRUE; 1425 } 1426 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1427 /* HACK: provide functions to set change of basis */ 1428 if (!ISForVertices && pcbddc->NullSpace) { 1429 pcbddc->use_change_of_basis = PETSC_TRUE; 1430 pcbddc->use_change_on_faces = PETSC_FALSE; 1431 } 1432 /* print some info */ 1433 if (pcbddc->dbg_flag) { 1434 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1435 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1436 i = 0; 1437 if (ISForVertices) { 1438 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1439 } 1440 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1442 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1443 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1444 } 1445 /* check if near null space is attached to global mat */ 1446 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1447 if (nearnullsp) { 1448 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1449 /* remove any stored info */ 1450 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1451 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1452 /* store information for BDDC solver reuse */ 1453 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1454 pcbddc->onearnullspace = nearnullsp; 1455 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1456 for (i=0;i<nnsp_size;i++) { 1457 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1458 } 1459 } else { /* if near null space is not provided BDDC uses constants by default */ 1460 nnsp_size = 0; 1461 nnsp_has_cnst = PETSC_TRUE; 1462 } 1463 /* get max number of constraints on a single cc */ 1464 max_constraints = nnsp_size; 1465 if (nnsp_has_cnst) max_constraints++; 1466 1467 /* 1468 Evaluate maximum storage size needed by the procedure 1469 - temp_indices will contain start index of each constraint stored as follows 1470 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1471 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1472 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1473 */ 1474 total_counts = n_ISForFaces+n_ISForEdges; 1475 total_counts *= max_constraints; 1476 n_vertices = 0; 1477 if (ISForVertices) { 1478 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1479 } 1480 total_counts += n_vertices; 1481 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1482 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1483 total_counts = 0; 1484 max_size_of_constraint = 0; 1485 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1486 if (i<n_ISForEdges) { 1487 used_IS = &ISForEdges[i]; 1488 } else { 1489 used_IS = &ISForFaces[i-n_ISForEdges]; 1490 } 1491 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1492 total_counts += j; 1493 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1494 } 1495 total_counts *= max_constraints; 1496 total_counts += n_vertices; 1497 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1498 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1499 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1500 /* get local part of global near null space vectors */ 1501 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1502 for (k=0;k<nnsp_size;k++) { 1503 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1504 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 } 1507 1508 /* whether or not to skip lapack calls */ 1509 skip_lapack = PETSC_TRUE; 1510 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1511 1512 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1513 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1514 PetscScalar temp_work; 1515 #if defined(PETSC_MISSING_LAPACK_GESVD) 1516 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1517 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1518 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1519 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1520 #if defined(PETSC_USE_COMPLEX) 1521 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1522 #endif 1523 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1524 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1525 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1526 lwork = -1; 1527 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1528 #if !defined(PETSC_USE_COMPLEX) 1529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1530 #else 1531 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1532 #endif 1533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1534 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1535 #else /* on missing GESVD */ 1536 /* SVD */ 1537 PetscInt max_n,min_n; 1538 max_n = max_size_of_constraint; 1539 min_n = max_constraints; 1540 if (max_size_of_constraint < max_constraints) { 1541 min_n = max_size_of_constraint; 1542 max_n = max_constraints; 1543 } 1544 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1545 #if defined(PETSC_USE_COMPLEX) 1546 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1547 #endif 1548 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1549 lwork = -1; 1550 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1551 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1552 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1553 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1554 #if !defined(PETSC_USE_COMPLEX) 1555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1556 #else 1557 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1558 #endif 1559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1560 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1561 #endif /* on missing GESVD */ 1562 /* Allocate optimal workspace */ 1563 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1564 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1565 } 1566 /* Now we can loop on constraining sets */ 1567 total_counts = 0; 1568 temp_indices[0] = 0; 1569 /* vertices */ 1570 if (ISForVertices) { 1571 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1572 if (nnsp_has_cnst) { /* consider all vertices */ 1573 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1574 for (i=0;i<n_vertices;i++) { 1575 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1576 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1577 total_counts++; 1578 } 1579 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1580 PetscBool used_vertex; 1581 for (i=0;i<n_vertices;i++) { 1582 used_vertex = PETSC_FALSE; 1583 k = 0; 1584 while (!used_vertex && k<nnsp_size) { 1585 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1586 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1587 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1588 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1589 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1590 total_counts++; 1591 used_vertex = PETSC_TRUE; 1592 } 1593 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1594 k++; 1595 } 1596 } 1597 } 1598 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1599 n_vertices = total_counts; 1600 } 1601 1602 /* edges and faces */ 1603 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1604 if (i<n_ISForEdges) { 1605 used_IS = &ISForEdges[i]; 1606 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1607 } else { 1608 used_IS = &ISForFaces[i-n_ISForEdges]; 1609 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1610 } 1611 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1612 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1613 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1614 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1615 /* change of basis should not be performed on local periodic nodes */ 1616 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1617 if (nnsp_has_cnst) { 1618 PetscScalar quad_value; 1619 temp_constraints++; 1620 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1621 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1622 for (j=0;j<size_of_constraint;j++) { 1623 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1624 } 1625 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1626 total_counts++; 1627 } 1628 for (k=0;k<nnsp_size;k++) { 1629 PetscReal real_value; 1630 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1631 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1632 for (j=0;j<size_of_constraint;j++) { 1633 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1634 } 1635 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1636 /* check if array is null on the connected component */ 1637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1638 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1639 if (real_value > 0.0) { /* keep indices and values */ 1640 temp_constraints++; 1641 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1642 total_counts++; 1643 } 1644 } 1645 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1646 valid_constraints = temp_constraints; 1647 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1648 if (!pcbddc->use_nnsp_true && temp_constraints) { 1649 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1650 1651 #if defined(PETSC_MISSING_LAPACK_GESVD) 1652 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1653 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1654 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1655 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1656 from that computed using LAPACKgesvd 1657 -> This is due to a different computation of eigenvectors in LAPACKheev 1658 -> The quality of the POD-computed basis will be the same */ 1659 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1660 /* Store upper triangular part of correlation matrix */ 1661 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1662 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1663 for (j=0;j<temp_constraints;j++) { 1664 for (k=0;k<j+1;k++) { 1665 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1666 } 1667 } 1668 /* compute eigenvalues and eigenvectors of correlation matrix */ 1669 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1670 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1671 #if !defined(PETSC_USE_COMPLEX) 1672 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1673 #else 1674 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1675 #endif 1676 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1677 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1678 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1679 j=0; 1680 while (j < temp_constraints && singular_vals[j] < tol) j++; 1681 total_counts=total_counts-j; 1682 valid_constraints = temp_constraints-j; 1683 /* scale and copy POD basis into used quadrature memory */ 1684 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1685 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1686 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1687 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1688 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1690 if (j<temp_constraints) { 1691 PetscInt ii; 1692 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1693 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1694 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1695 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1696 for (k=0;k<temp_constraints-j;k++) { 1697 for (ii=0;ii<size_of_constraint;ii++) { 1698 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1699 } 1700 } 1701 } 1702 #else /* on missing GESVD */ 1703 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1704 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1705 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1706 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1707 #if !defined(PETSC_USE_COMPLEX) 1708 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1709 #else 1710 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1711 #endif 1712 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1713 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1714 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1715 k = temp_constraints; 1716 if (k > size_of_constraint) k = size_of_constraint; 1717 j = 0; 1718 while (j < k && singular_vals[k-j-1] < tol) j++; 1719 total_counts = total_counts-temp_constraints+k-j; 1720 valid_constraints = k-j; 1721 #endif /* on missing GESVD */ 1722 } 1723 /* setting change_of_basis flag is safe now */ 1724 if (boolforchange) { 1725 for (j=0;j<valid_constraints;j++) { 1726 PetscBTSet(change_basis,total_counts-j-1); 1727 } 1728 } 1729 } 1730 /* free index sets of faces, edges and vertices */ 1731 for (i=0;i<n_ISForFaces;i++) { 1732 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1733 } 1734 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1735 for (i=0;i<n_ISForEdges;i++) { 1736 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1737 } 1738 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1739 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1740 /* map temp_indices_to_constraint in boundary numbering */ 1741 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1742 if (i != temp_indices[total_counts]) { 1743 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1744 } 1745 1746 /* free workspace */ 1747 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1748 ierr = PetscFree(work);CHKERRQ(ierr); 1749 #if defined(PETSC_USE_COMPLEX) 1750 ierr = PetscFree(rwork);CHKERRQ(ierr); 1751 #endif 1752 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1753 #if defined(PETSC_MISSING_LAPACK_GESVD) 1754 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1755 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1756 #endif 1757 } 1758 for (k=0;k<nnsp_size;k++) { 1759 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1760 } 1761 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1762 1763 /* set quantities in pcbddc data structure and store previous primal size */ 1764 /* n_vertices defines the number of subdomain corners in the primal space */ 1765 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1766 olocal_primal_size = pcbddc->local_primal_size; 1767 pcbddc->local_primal_size = total_counts; 1768 pcbddc->n_vertices = n_vertices; 1769 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1770 1771 /* Create constraint matrix */ 1772 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1773 /* so we need to set it up properly either with or without change of basis */ 1774 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1775 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1776 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1777 /* array to compute a local numbering of constraints : vertices first then constraints */ 1778 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1779 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1780 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1781 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1782 /* auxiliary stuff for basis change */ 1783 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1784 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1785 1786 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1787 total_primal_vertices=0; 1788 for (i=0;i<pcbddc->local_primal_size;i++) { 1789 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1790 if (size_of_constraint == 1) { 1791 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1792 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1793 aux_primal_minloc[total_primal_vertices]=0; 1794 total_primal_vertices++; 1795 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1796 PetscInt min_loc,min_index; 1797 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1798 /* find first untouched local node */ 1799 k = 0; 1800 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1801 min_index = global_indices[k]; 1802 min_loc = k; 1803 /* search the minimum among global nodes already untouched on the cc */ 1804 for (k=1;k<size_of_constraint;k++) { 1805 /* there can be more than one constraint on a single connected component */ 1806 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1807 min_index = global_indices[k]; 1808 min_loc = k; 1809 } 1810 } 1811 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1812 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1813 aux_primal_minloc[total_primal_vertices]=min_loc; 1814 total_primal_vertices++; 1815 } 1816 } 1817 /* determine if a QR strategy is needed for change of basis */ 1818 qr_needed = PETSC_FALSE; 1819 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1820 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1821 if (PetscBTLookup(change_basis,i)) { 1822 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1823 j = 0; 1824 for (k=0;k<size_of_constraint;k++) { 1825 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1826 j++; 1827 } 1828 } 1829 /* found more than one primal dof on the cc */ 1830 if (j > 1) { 1831 PetscBTSet(qr_needed_idx,i); 1832 qr_needed = PETSC_TRUE; 1833 } 1834 } 1835 } 1836 /* free workspace */ 1837 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1838 1839 /* permute indices in order to have a sorted set of vertices */ 1840 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1841 1842 /* nonzero structure of constraint matrix */ 1843 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1844 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1845 j=total_primal_vertices; 1846 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1847 if (!PetscBTLookup(change_basis,i)) { 1848 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1849 j++; 1850 } 1851 } 1852 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1853 ierr = PetscFree(nnz);CHKERRQ(ierr); 1854 /* set values in constraint matrix */ 1855 for (i=0;i<total_primal_vertices;i++) { 1856 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1857 } 1858 total_counts = total_primal_vertices; 1859 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1860 if (!PetscBTLookup(change_basis,i)) { 1861 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1862 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1863 total_counts++; 1864 } 1865 } 1866 /* assembling */ 1867 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1868 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1869 /* 1870 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1871 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1872 */ 1873 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1874 if (pcbddc->use_change_of_basis) { 1875 /* dual and primal dofs on a single cc */ 1876 PetscInt dual_dofs,primal_dofs; 1877 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1878 PetscInt primal_counter; 1879 /* working stuff for GEQRF */ 1880 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1881 PetscBLASInt lqr_work; 1882 /* working stuff for UNGQR */ 1883 PetscScalar *gqr_work,lgqr_work_t; 1884 PetscBLASInt lgqr_work; 1885 /* working stuff for TRTRS */ 1886 PetscScalar *trs_rhs; 1887 PetscBLASInt Blas_NRHS; 1888 /* pointers for values insertion into change of basis matrix */ 1889 PetscInt *start_rows,*start_cols; 1890 PetscScalar *start_vals; 1891 /* working stuff for values insertion */ 1892 PetscBT is_primal; 1893 1894 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1895 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1896 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1897 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1898 /* work arrays */ 1899 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1900 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1901 /* nonzeros per row */ 1902 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1903 if (PetscBTLookup(change_basis,i)) { 1904 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1905 if (PetscBTLookup(qr_needed_idx,i)) { 1906 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1907 } else { 1908 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1909 /* get local primal index on the cc */ 1910 j = 0; 1911 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1912 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1913 } 1914 } 1915 } 1916 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1917 ierr = PetscFree(nnz);CHKERRQ(ierr); 1918 /* Set initial identity in the matrix */ 1919 for (i=0;i<pcis->n_B;i++) { 1920 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1921 } 1922 1923 if (pcbddc->dbg_flag) { 1924 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1926 } 1927 1928 1929 /* Now we loop on the constraints which need a change of basis */ 1930 /* 1931 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1932 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1933 1934 Basic blocks of change of basis matrix T computed 1935 1936 - Using the following block transformation if there is only a primal dof on the cc 1937 (in the example, primal dof is the last one of the edge in LOCAL ordering 1938 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1939 | 1 0 ... 0 1 | 1940 | 0 1 ... 0 1 | 1941 | ... | 1942 | 0 ... 1 1 | 1943 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1944 1945 - via QR decomposition of constraints otherwise 1946 */ 1947 if (qr_needed) { 1948 /* space to store Q */ 1949 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1950 /* first we issue queries for optimal work */ 1951 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1952 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1953 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1954 lqr_work = -1; 1955 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1956 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1957 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1958 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1959 lgqr_work = -1; 1960 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1962 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1963 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1964 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1965 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1967 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1968 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1969 /* array to store scaling factors for reflectors */ 1970 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1971 /* array to store rhs and solution of triangular solver */ 1972 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1973 /* allocating workspace for check */ 1974 if (pcbddc->dbg_flag) { 1975 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1976 } 1977 } 1978 /* array to store whether a node is primal or not */ 1979 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1980 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1981 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1982 if (i != total_primal_vertices) { 1983 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1984 } 1985 for (i=0;i<total_primal_vertices;i++) { 1986 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1987 } 1988 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1989 1990 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1991 /* -> using implicit ordering contained in temp_indices data */ 1992 total_counts = pcbddc->n_vertices; 1993 primal_counter = total_counts; 1994 while (total_counts<pcbddc->local_primal_size) { 1995 primal_dofs = 1; 1996 if (PetscBTLookup(change_basis,total_counts)) { 1997 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1998 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1999 primal_dofs++; 2000 } 2001 /* get constraint info */ 2002 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2003 dual_dofs = size_of_constraint-primal_dofs; 2004 2005 if (pcbddc->dbg_flag) { 2006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2007 } 2008 2009 if (primal_dofs > 1) { /* QR */ 2010 2011 /* copy quadrature constraints for change of basis check */ 2012 if (pcbddc->dbg_flag) { 2013 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2014 } 2015 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2016 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2017 2018 /* compute QR decomposition of constraints */ 2019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2020 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2021 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2022 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2023 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2024 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2025 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2026 2027 /* explictly compute R^-T */ 2028 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2029 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2030 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2031 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2032 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2033 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2034 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2035 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2036 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2037 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2038 2039 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2040 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2041 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2042 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2043 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2044 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2045 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2046 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2047 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2048 2049 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2050 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2051 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2052 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2053 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2054 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2056 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2058 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2059 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2060 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2061 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2062 2063 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2064 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 2065 /* insert cols for primal dofs */ 2066 for (j=0;j<primal_dofs;j++) { 2067 start_vals = &qr_basis[j*size_of_constraint]; 2068 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2069 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2070 } 2071 /* insert cols for dual dofs */ 2072 for (j=0,k=0;j<dual_dofs;k++) { 2073 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2074 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2075 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2076 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2077 j++; 2078 } 2079 } 2080 2081 /* check change of basis */ 2082 if (pcbddc->dbg_flag) { 2083 PetscInt ii,jj; 2084 PetscBool valid_qr=PETSC_TRUE; 2085 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2087 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2088 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2090 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2091 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2092 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2093 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2094 for (jj=0;jj<size_of_constraint;jj++) { 2095 for (ii=0;ii<primal_dofs;ii++) { 2096 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2097 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2098 } 2099 } 2100 if (!valid_qr) { 2101 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2102 for (jj=0;jj<size_of_constraint;jj++) { 2103 for (ii=0;ii<primal_dofs;ii++) { 2104 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2105 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2106 } 2107 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2108 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2109 } 2110 } 2111 } 2112 } else { 2113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2114 } 2115 } 2116 } else { /* simple transformation block */ 2117 PetscInt row,col; 2118 PetscScalar val; 2119 for (j=0;j<size_of_constraint;j++) { 2120 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2121 if (!PetscBTLookup(is_primal,row)) { 2122 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2123 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2124 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2125 } else { 2126 for (k=0;k<size_of_constraint;k++) { 2127 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2128 if (row != col) { 2129 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2130 } else { 2131 val = 1.0; 2132 } 2133 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2134 } 2135 } 2136 } 2137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2138 } 2139 /* increment primal counter */ 2140 primal_counter += primal_dofs; 2141 } else { 2142 if (pcbddc->dbg_flag) { 2143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2144 } 2145 } 2146 /* increment constraint counter total_counts */ 2147 total_counts += primal_dofs; 2148 } 2149 2150 /* free workspace */ 2151 if (qr_needed) { 2152 if (pcbddc->dbg_flag) { 2153 ierr = PetscFree(work);CHKERRQ(ierr); 2154 } 2155 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2156 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2157 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2158 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2159 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2160 } 2161 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2162 /* assembling */ 2163 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 /* 2166 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2167 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2168 */ 2169 } 2170 2171 /* get indices in local ordering for vertices and constraints */ 2172 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2173 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2174 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2175 } 2176 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2177 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2178 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2179 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2180 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2181 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2182 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2183 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2184 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2185 /* set quantities in PCBDDC data struct */ 2186 pcbddc->n_actual_vertices = i; 2187 /* check if a new primal space has been introduced */ 2188 pcbddc->new_primal_space_local = PETSC_TRUE; 2189 if (olocal_primal_size == pcbddc->local_primal_size) { 2190 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2191 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2192 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2193 } 2194 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2195 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2196 2197 /* flush dbg viewer */ 2198 if (pcbddc->dbg_flag) { 2199 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2200 } 2201 2202 /* free workspace */ 2203 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2204 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2205 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2206 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2207 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2208 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2209 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2210 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2216 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2217 { 2218 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2219 PC_IS *pcis = (PC_IS*)pc->data; 2220 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2221 PetscInt ierr,i,vertex_size; 2222 PetscViewer viewer=pcbddc->dbg_viewer; 2223 2224 PetscFunctionBegin; 2225 /* Reset previously computed graph */ 2226 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2227 /* Init local Graph struct */ 2228 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2229 2230 /* Check validity of the csr graph passed in by the user */ 2231 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2232 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2233 } 2234 2235 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2236 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2237 Mat mat_adj; 2238 const PetscInt *xadj,*adjncy; 2239 PetscBool flg_row=PETSC_TRUE; 2240 2241 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2242 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2243 if (!flg_row) { 2244 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2245 } 2246 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2247 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2248 if (!flg_row) { 2249 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2250 } 2251 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2252 } 2253 2254 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2255 vertex_size = 1; 2256 if (pcbddc->user_provided_isfordofs) { 2257 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2258 ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2259 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2260 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2261 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2262 } 2263 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2264 pcbddc->n_ISForDofs = 0; 2265 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2266 } 2267 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2268 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2269 } else { 2270 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2271 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2272 ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2273 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2274 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2275 } 2276 } 2277 } 2278 2279 /* Setup of Graph */ 2280 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2281 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2282 } 2283 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2284 ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2285 } 2286 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2287 2288 /* Graph's connected components analysis */ 2289 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2290 2291 /* print some info to stdout */ 2292 if (pcbddc->dbg_flag) { 2293 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2294 } 2295 2296 /* mark topography has done */ 2297 pcbddc->recompute_topography = PETSC_FALSE; 2298 PetscFunctionReturn(0); 2299 } 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2303 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2304 { 2305 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2306 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 n = 0; 2311 vertices = 0; 2312 if (pcbddc->ConstraintMatrix) { 2313 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2314 for (i=0;i<local_primal_size;i++) { 2315 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2316 if (size_of_constraint == 1) n++; 2317 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2318 } 2319 if (vertices_idx) { 2320 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2321 n = 0; 2322 for (i=0;i<local_primal_size;i++) { 2323 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2324 if (size_of_constraint == 1) { 2325 vertices[n++]=row_cmat_indices[0]; 2326 } 2327 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2328 } 2329 } 2330 } 2331 *n_vertices = n; 2332 if (vertices_idx) *vertices_idx = vertices; 2333 PetscFunctionReturn(0); 2334 } 2335 2336 #undef __FUNCT__ 2337 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2338 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2339 { 2340 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2341 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2342 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2343 PetscBT touched; 2344 PetscErrorCode ierr; 2345 2346 /* This function assumes that the number of local constraints per connected component 2347 is not greater than the number of nodes defined for the connected component 2348 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2349 PetscFunctionBegin; 2350 n = 0; 2351 constraints_index = 0; 2352 if (pcbddc->ConstraintMatrix) { 2353 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2354 max_size_of_constraint = 0; 2355 for (i=0;i<local_primal_size;i++) { 2356 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2357 if (size_of_constraint > 1) { 2358 n++; 2359 } 2360 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2361 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2362 } 2363 if (constraints_idx) { 2364 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2365 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2366 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2367 n = 0; 2368 for (i=0;i<local_primal_size;i++) { 2369 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2370 if (size_of_constraint > 1) { 2371 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2372 /* find first untouched local node */ 2373 j = 0; 2374 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2375 min_index = row_cmat_global_indices[j]; 2376 min_loc = j; 2377 /* search the minimum among nodes not yet touched on the connected component 2378 since there can be more than one constraint on a single cc */ 2379 for (j=1;j<size_of_constraint;j++) { 2380 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2381 min_index = row_cmat_global_indices[j]; 2382 min_loc = j; 2383 } 2384 } 2385 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2386 constraints_index[n++] = row_cmat_indices[min_loc]; 2387 } 2388 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2389 } 2390 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2391 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2392 } 2393 } 2394 *n_constraints = n; 2395 if (constraints_idx) *constraints_idx = constraints_index; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 /* the next two functions has been adapted from pcis.c */ 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "PCBDDCApplySchur" 2402 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2403 { 2404 PetscErrorCode ierr; 2405 PC_IS *pcis = (PC_IS*)(pc->data); 2406 2407 PetscFunctionBegin; 2408 if (!vec2_B) { vec2_B = v; } 2409 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2410 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2411 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2412 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2413 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 #undef __FUNCT__ 2418 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2419 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2420 { 2421 PetscErrorCode ierr; 2422 PC_IS *pcis = (PC_IS*)(pc->data); 2423 2424 PetscFunctionBegin; 2425 if (!vec2_B) { vec2_B = v; } 2426 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2427 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2428 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2429 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2430 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "PCBDDCSubsetNumbering" 2436 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2437 { 2438 Vec local_vec,global_vec; 2439 IS seqis,paris; 2440 VecScatter scatter_ctx; 2441 PetscScalar *array; 2442 PetscInt *temp_global_dofs; 2443 PetscScalar globalsum; 2444 PetscInt i,j,s; 2445 PetscInt nlocals,first_index,old_index,max_local; 2446 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2447 PetscMPIInt *dof_sizes,*dof_displs; 2448 PetscBool first_found; 2449 PetscErrorCode ierr; 2450 2451 PetscFunctionBegin; 2452 /* mpi buffers */ 2453 MPI_Comm_size(comm,&size_prec_comm); 2454 MPI_Comm_rank(comm,&rank_prec_comm); 2455 j = ( !rank_prec_comm ? size_prec_comm : 0); 2456 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2457 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2458 /* get maximum size of subset */ 2459 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2460 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2461 max_local = 0; 2462 if (n_local_dofs) { 2463 max_local = temp_global_dofs[0]; 2464 for (i=1;i<n_local_dofs;i++) { 2465 if (max_local < temp_global_dofs[i] ) { 2466 max_local = temp_global_dofs[i]; 2467 } 2468 } 2469 } 2470 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2471 max_global++; 2472 max_local = 0; 2473 if (n_local_dofs) { 2474 max_local = local_dofs[0]; 2475 for (i=1;i<n_local_dofs;i++) { 2476 if (max_local < local_dofs[i] ) { 2477 max_local = local_dofs[i]; 2478 } 2479 } 2480 } 2481 max_local++; 2482 /* allocate workspace */ 2483 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2484 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2485 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2486 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2487 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2488 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2489 /* create scatter */ 2490 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2491 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2492 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2493 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2494 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2495 /* init array */ 2496 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2497 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2498 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2499 if (local_dofs_mult) { 2500 for (i=0;i<n_local_dofs;i++) { 2501 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2502 } 2503 } else { 2504 for (i=0;i<n_local_dofs;i++) { 2505 array[local_dofs[i]]=1.0; 2506 } 2507 } 2508 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2509 /* scatter into global vec and get total number of global dofs */ 2510 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2511 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2512 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2513 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2514 /* Fill global_vec with cumulative function for global numbering */ 2515 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2516 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2517 nlocals = 0; 2518 first_index = -1; 2519 first_found = PETSC_FALSE; 2520 for (i=0;i<s;i++) { 2521 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2522 first_found = PETSC_TRUE; 2523 first_index = i; 2524 } 2525 nlocals += (PetscInt)PetscRealPart(array[i]); 2526 } 2527 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2528 if (!rank_prec_comm) { 2529 dof_displs[0]=0; 2530 for (i=1;i<size_prec_comm;i++) { 2531 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2532 } 2533 } 2534 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2535 if (first_found) { 2536 array[first_index] += (PetscScalar)nlocals; 2537 old_index = first_index; 2538 for (i=first_index+1;i<s;i++) { 2539 if (PetscRealPart(array[i]) > 0.0) { 2540 array[i] += array[old_index]; 2541 old_index = i; 2542 } 2543 } 2544 } 2545 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2546 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2547 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2548 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2549 /* get global ordering of local dofs */ 2550 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2551 if (local_dofs_mult) { 2552 for (i=0;i<n_local_dofs;i++) { 2553 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2554 } 2555 } else { 2556 for (i=0;i<n_local_dofs;i++) { 2557 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2558 } 2559 } 2560 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2561 /* free workspace */ 2562 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2563 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2564 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2565 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2566 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2567 /* return pointer to global ordering of local dofs */ 2568 *global_numbering_subset = temp_global_dofs; 2569 PetscFunctionReturn(0); 2570 } 2571 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2574 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2575 { 2576 PetscInt i,j; 2577 PetscScalar *alphas; 2578 PetscErrorCode ierr; 2579 2580 PetscFunctionBegin; 2581 /* this implements stabilized Gram-Schmidt */ 2582 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2583 for (i=0;i<n;i++) { 2584 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2585 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2586 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2587 } 2588 ierr = PetscFree(alphas);CHKERRQ(ierr); 2589 PetscFunctionReturn(0); 2590 } 2591 2592 /* TODO 2593 - now preallocation is done assuming SEQDENSE local matrices 2594 */ 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatISGetMPIXAIJ" 2597 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2598 { 2599 Mat new_mat; 2600 Mat_IS *matis = (Mat_IS*)(mat->data); 2601 /* info on mat */ 2602 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2603 PetscInt bs,rows,cols; 2604 PetscInt lrows,lcols; 2605 PetscInt local_rows,local_cols; 2606 PetscBool isdense; 2607 /* values insertion */ 2608 PetscScalar *array; 2609 PetscInt *local_indices,*global_indices; 2610 /* work */ 2611 PetscInt i,j,index_row; 2612 PetscErrorCode ierr; 2613 2614 PetscFunctionBegin; 2615 /* MISSING CHECKS 2616 - rectangular case not covered (it is not allowed by MATIS) 2617 */ 2618 /* get info from mat */ 2619 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2620 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2621 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2622 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2623 2624 /* work */ 2625 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2626 for (i=0;i<local_rows;i++) local_indices[i]=i; 2627 /* map indices of local mat to global values */ 2628 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2629 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2630 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2631 2632 if (reuse==MAT_INITIAL_MATRIX) { 2633 Vec vec_dnz,vec_onz; 2634 PetscScalar *my_dnz,*my_onz; 2635 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2636 PetscInt index_col,owner; 2637 PetscMPIInt nsubdomains; 2638 2639 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2640 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2641 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2642 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2643 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2644 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2645 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2646 2647 /* 2648 preallocation 2649 */ 2650 2651 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2652 /* 2653 Some vectors are needed to sum up properly on shared interface dofs. 2654 Preallocation macros cannot do the job. 2655 Note that preallocation is not exact, since it overestimates nonzeros 2656 */ 2657 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2658 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2659 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2660 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2661 /* All processes need to compute entire row ownership */ 2662 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2663 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2664 for (i=0;i<nsubdomains;i++) { 2665 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2666 row_ownership[j]=i; 2667 } 2668 } 2669 2670 /* 2671 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2672 then, they will be summed up properly. This way, preallocation is always sufficient 2673 */ 2674 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2675 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2676 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2677 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2678 for (i=0;i<local_rows;i++) { 2679 index_row = global_indices[i]; 2680 for (j=i;j<local_rows;j++) { 2681 owner = row_ownership[index_row]; 2682 index_col = global_indices[j]; 2683 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2684 my_dnz[i] += 1.0; 2685 } else { /* offdiag block */ 2686 my_onz[i] += 1.0; 2687 } 2688 /* same as before, interchanging rows and cols */ 2689 if (i != j) { 2690 owner = row_ownership[index_col]; 2691 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2692 my_dnz[j] += 1.0; 2693 } else { 2694 my_onz[j] += 1.0; 2695 } 2696 } 2697 } 2698 } 2699 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2700 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2701 if (local_rows) { /* multilevel guard */ 2702 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2703 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2704 } 2705 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2706 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2707 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2708 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2709 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2710 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2711 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2712 2713 /* set computed preallocation in dnz and onz */ 2714 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2715 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2716 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2717 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2718 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2719 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2720 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2721 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2722 2723 /* Resize preallocation if overestimated */ 2724 for (i=0;i<lrows;i++) { 2725 dnz[i] = PetscMin(dnz[i],lcols); 2726 onz[i] = PetscMin(onz[i],cols-lcols); 2727 } 2728 /* set preallocation */ 2729 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2730 for (i=0;i<lrows/bs;i++) { 2731 dnz[i] = dnz[i*bs]/bs; 2732 onz[i] = onz[i*bs]/bs; 2733 } 2734 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2735 for (i=0;i<lrows/bs;i++) { 2736 dnz[i] = dnz[i]-i; 2737 } 2738 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2739 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2740 *M = new_mat; 2741 } else { 2742 PetscInt mbs,mrows,mcols; 2743 /* some checks */ 2744 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2745 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2746 if (mrows != rows) { 2747 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2748 } 2749 if (mrows != rows) { 2750 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2751 } 2752 if (mbs != bs) { 2753 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2754 } 2755 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2756 } 2757 /* set local to global mappings */ 2758 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2759 /* Set values */ 2760 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2761 if (isdense) { /* special case for dense local matrices */ 2762 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2763 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2764 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2765 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2766 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2767 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2768 } else { /* very basic values insertion for all other matrix types */ 2769 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2770 for (i=0;i<local_rows;i++) { 2771 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2772 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2773 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2774 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2775 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2776 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2777 } 2778 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2779 } 2780 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2781 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2782 if (isdense) { 2783 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2784 } 2785 PetscFunctionReturn(0); 2786 } 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatISGetSubassemblingPattern" 2790 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 2791 { 2792 Mat subdomain_adj; 2793 IS new_ranks,ranks_send_to; 2794 MatPartitioning partitioner; 2795 Mat_IS *matis; 2796 PetscInt n_neighs,*neighs,*n_shared,**shared; 2797 PetscInt prank; 2798 PetscMPIInt size,rank,color; 2799 PetscInt *xadj,*adjncy,*oldranks; 2800 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2801 PetscInt i,j,local_size,threshold=0; 2802 PetscErrorCode ierr; 2803 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2804 PetscSubcomm subcomm; 2805 2806 PetscFunctionBegin; 2807 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2808 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2809 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2810 2811 /* Get info on mapping */ 2812 matis = (Mat_IS*)(mat->data); 2813 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2814 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2815 2816 /* build local CSR graph of subdomains' connectivity */ 2817 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2818 xadj[0] = 0; 2819 xadj[1] = PetscMax(n_neighs-1,0); 2820 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2821 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2822 2823 if (threshold) { 2824 PetscInt* count,min_threshold; 2825 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2826 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2827 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2828 for (j=0;j<n_shared[i];j++) { 2829 count[shared[i][j]] += 1; 2830 } 2831 } 2832 /* adapt threshold since we dont want to lose significant connections */ 2833 min_threshold = n_neighs; 2834 for (i=1;i<n_neighs;i++) { 2835 for (j=0;j<n_shared[i];j++) { 2836 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2837 } 2838 } 2839 threshold = PetscMax(min_threshold+1,threshold); 2840 xadj[1] = 0; 2841 for (i=1;i<n_neighs;i++) { 2842 for (j=0;j<n_shared[i];j++) { 2843 if (count[shared[i][j]] < threshold) { 2844 adjncy[xadj[1]] = neighs[i]; 2845 adjncy_wgt[xadj[1]] = n_shared[i]; 2846 xadj[1]++; 2847 break; 2848 } 2849 } 2850 } 2851 ierr = PetscFree(count);CHKERRQ(ierr); 2852 } else { 2853 if (xadj[1]) { 2854 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2855 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2856 } 2857 } 2858 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2859 if (use_square) { 2860 for (i=0;i<xadj[1];i++) { 2861 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2862 } 2863 } 2864 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2865 2866 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2867 2868 /* 2869 Restrict work on active processes only. 2870 */ 2871 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2872 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2873 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2874 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2875 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2876 if (color) { 2877 ierr = PetscFree(xadj);CHKERRQ(ierr); 2878 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2879 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2880 } else { 2881 PetscInt coarsening_ratio; 2882 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2883 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2884 prank = rank; 2885 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2886 /* 2887 for (i=0;i<size;i++) { 2888 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2889 } 2890 */ 2891 for (i=0;i<xadj[1];i++) { 2892 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2893 } 2894 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2895 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2896 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 2897 2898 /* Partition */ 2899 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2900 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2901 if (use_vwgt) { 2902 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2903 v_wgt[0] = local_size; 2904 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2905 } 2906 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 2907 coarsening_ratio = size/n_subdomains; 2908 /* Parmetis does not always give back nparts with small graphs! this should be taken into account */ 2909 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 2910 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2911 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2912 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 2913 2914 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2915 if (contiguous) { 2916 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 2917 } else { 2918 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 2919 } 2920 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2921 /* clean up */ 2922 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2923 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2924 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2925 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2926 } 2927 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2928 2929 /* assemble parallel IS for sends */ 2930 i = 1; 2931 if (color) i=0; 2932 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2933 2934 /* get back IS */ 2935 *is_sends = ranks_send_to; 2936 PetscFunctionReturn(0); 2937 } 2938 2939 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2940 2941 #undef __FUNCT__ 2942 #define __FUNCT__ "MatISSubassemble" 2943 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 2944 { 2945 Mat local_mat; 2946 Mat_IS *matis; 2947 IS is_sends_internal; 2948 PetscInt rows,cols; 2949 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 2950 PetscBool ismatis,isdense,destroy_mat; 2951 ISLocalToGlobalMapping l2gmap; 2952 PetscInt* l2gmap_indices; 2953 const PetscInt* is_indices; 2954 MatType new_local_type; 2955 /* buffers */ 2956 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2957 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 2958 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2959 /* MPI */ 2960 MPI_Comm comm,comm_n; 2961 PetscSubcomm subcomm; 2962 PetscMPIInt n_sends,n_recvs,commsize; 2963 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 2964 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 2965 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 2966 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 2967 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 2968 PetscErrorCode ierr; 2969 2970 PetscFunctionBegin; 2971 /* TODO: add missing checks */ 2972 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 2973 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 2974 PetscValidLogicalCollectiveEnum(mat,reuse,5); 2975 PetscValidLogicalCollectiveInt(mat,nis,7); 2976 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2977 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 2978 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2979 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2980 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2981 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2982 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2983 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 2984 PetscInt mrows,mcols,mnrows,mncols; 2985 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 2986 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 2987 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 2988 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 2989 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 2990 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 2991 } 2992 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2993 PetscValidLogicalCollectiveInt(mat,bs,0); 2994 /* prepare IS for sending if not provided */ 2995 if (!is_sends) { 2996 PetscBool pcontig = PETSC_TRUE; 2997 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 2998 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 2999 } else { 3000 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3001 is_sends_internal = is_sends; 3002 } 3003 3004 /* get pointer of MATIS data */ 3005 matis = (Mat_IS*)mat->data; 3006 3007 /* get comm */ 3008 comm = PetscObjectComm((PetscObject)mat); 3009 3010 /* compute number of sends */ 3011 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3012 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3013 3014 /* compute number of receives */ 3015 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3016 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 3017 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3018 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3019 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3020 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3021 ierr = PetscFree(iflags);CHKERRQ(ierr); 3022 3023 /* restrict comm if requested */ 3024 subcomm = 0; 3025 destroy_mat = PETSC_FALSE; 3026 if (restrict_comm) { 3027 PetscMPIInt color,rank,subcommsize; 3028 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3029 color = 0; 3030 if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */ 3031 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3032 subcommsize = commsize - subcommsize; 3033 /* check if reuse has been requested */ 3034 if (reuse == MAT_REUSE_MATRIX) { 3035 if (*mat_n) { 3036 PetscMPIInt subcommsize2; 3037 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3038 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3039 comm_n = PetscObjectComm((PetscObject)*mat_n); 3040 } else { 3041 comm_n = PETSC_COMM_SELF; 3042 } 3043 } else { /* MAT_INITIAL_MATRIX */ 3044 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3045 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3046 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3047 comm_n = subcomm->comm; 3048 } 3049 /* flag to destroy *mat_n if not significative */ 3050 if (color) destroy_mat = PETSC_TRUE; 3051 } else { 3052 comm_n = comm; 3053 } 3054 3055 /* prepare send/receive buffers */ 3056 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 3057 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3058 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 3059 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3060 if (nis) { 3061 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs_is),&ilengths_idxs_is);CHKERRQ(ierr); 3062 ierr = PetscMemzero(ilengths_idxs_is,commsize*sizeof(*ilengths_idxs_is));CHKERRQ(ierr); 3063 } 3064 3065 /* Get data from local matrices */ 3066 if (!isdense) { 3067 /* TODO: See below some guidelines on how to prepare the local buffers */ 3068 /* 3069 send_buffer_vals should contain the raw values of the local matrix 3070 send_buffer_idxs should contain: 3071 - MatType_PRIVATE type 3072 - PetscInt size_of_l2gmap 3073 - PetscInt global_row_indices[size_of_l2gmap] 3074 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3075 */ 3076 } else { 3077 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3078 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3079 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 3080 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3081 send_buffer_idxs[1] = i; 3082 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3083 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3084 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3085 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3086 for (i=0;i<n_sends;i++) { 3087 ilengths_vals[is_indices[i]] = len*len; 3088 ilengths_idxs[is_indices[i]] = len+2; 3089 } 3090 } 3091 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3092 /* additional is (if any) */ 3093 if (nis) { 3094 PetscMPIInt psum; 3095 PetscInt j; 3096 for (j=0,psum=0;j<nis;j++) { 3097 PetscInt plen; 3098 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3099 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3100 psum += len+1; /* indices + lenght */ 3101 } 3102 ierr = PetscMalloc(psum*sizeof(PetscInt),&send_buffer_idxs_is);CHKERRQ(ierr); 3103 for (j=0,psum=0;j<nis;j++) { 3104 PetscInt plen; 3105 const PetscInt *is_array_idxs; 3106 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3107 send_buffer_idxs_is[psum] = plen; 3108 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3109 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3110 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3111 psum += plen+1; /* indices + lenght */ 3112 } 3113 for (i=0;i<n_sends;i++) { 3114 ilengths_idxs_is[is_indices[i]] = psum; 3115 } 3116 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3117 } 3118 3119 buf_size_idxs = 0; 3120 buf_size_vals = 0; 3121 buf_size_idxs_is = 0; 3122 for (i=0;i<n_recvs;i++) { 3123 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3124 buf_size_vals += (PetscInt)olengths_vals[i]; 3125 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3126 } 3127 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 3128 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 3129 ierr = PetscMalloc(buf_size_idxs_is*sizeof(PetscInt),&recv_buffer_idxs_is);CHKERRQ(ierr); 3130 3131 /* get new tags for clean communications */ 3132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3133 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3134 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3135 3136 /* allocate for requests */ 3137 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 3138 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 3139 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs_is);CHKERRQ(ierr); 3140 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 3141 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 3142 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs_is);CHKERRQ(ierr); 3143 3144 /* communications */ 3145 ptr_idxs = recv_buffer_idxs; 3146 ptr_vals = recv_buffer_vals; 3147 ptr_idxs_is = recv_buffer_idxs_is; 3148 for (i=0;i<n_recvs;i++) { 3149 source_dest = onodes[i]; 3150 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3151 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3152 ptr_idxs += olengths_idxs[i]; 3153 ptr_vals += olengths_vals[i]; 3154 if (nis) { 3155 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3156 ptr_idxs_is += olengths_idxs_is[i]; 3157 } 3158 } 3159 for (i=0;i<n_sends;i++) { 3160 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3161 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3162 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3163 if (nis) { 3164 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3165 } 3166 } 3167 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3168 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3169 3170 /* assemble new l2g map */ 3171 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3172 ptr_idxs = recv_buffer_idxs; 3173 buf_size_idxs = 0; 3174 for (i=0;i<n_recvs;i++) { 3175 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3176 ptr_idxs += olengths_idxs[i]; 3177 } 3178 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 3179 ptr_idxs = recv_buffer_idxs; 3180 buf_size_idxs = 0; 3181 for (i=0;i<n_recvs;i++) { 3182 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3183 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3184 ptr_idxs += olengths_idxs[i]; 3185 } 3186 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 3187 ierr = ISLocalToGlobalMappingCreate(comm_n,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3188 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3189 3190 /* infer new local matrix type from received local matrices type */ 3191 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3192 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3193 if (n_recvs) { 3194 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3195 ptr_idxs = recv_buffer_idxs; 3196 for (i=0;i<n_recvs;i++) { 3197 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3198 new_local_type_private = MATAIJ_PRIVATE; 3199 break; 3200 } 3201 ptr_idxs += olengths_idxs[i]; 3202 } 3203 switch (new_local_type_private) { 3204 case MATDENSE_PRIVATE: 3205 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3206 new_local_type = MATSEQAIJ; 3207 bs = 1; 3208 } else { /* if I receive only 1 dense matrix */ 3209 new_local_type = MATSEQDENSE; 3210 bs = 1; 3211 } 3212 break; 3213 case MATAIJ_PRIVATE: 3214 new_local_type = MATSEQAIJ; 3215 bs = 1; 3216 break; 3217 case MATBAIJ_PRIVATE: 3218 new_local_type = MATSEQBAIJ; 3219 break; 3220 case MATSBAIJ_PRIVATE: 3221 new_local_type = MATSEQSBAIJ; 3222 break; 3223 default: 3224 SETERRQ2(comm,PETSC_ERR_PLIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3225 break; 3226 } 3227 } else { /* by default, new_local_type is seqdense */ 3228 new_local_type = MATSEQDENSE; 3229 bs = 1; 3230 } 3231 3232 /* create MATIS object if needed */ 3233 if (reuse == MAT_INITIAL_MATRIX) { 3234 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3235 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3236 } else { 3237 /* it also destroys the local matrices */ 3238 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3239 } 3240 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3241 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3242 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3243 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3244 3245 /* set values */ 3246 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3247 ptr_vals = recv_buffer_vals; 3248 ptr_idxs = recv_buffer_idxs; 3249 for (i=0;i<n_recvs;i++) { 3250 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3251 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3252 ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3253 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3254 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3255 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3256 } else { 3257 /* TODO */ 3258 } 3259 ptr_idxs += olengths_idxs[i]; 3260 ptr_vals += olengths_vals[i]; 3261 } 3262 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3263 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3266 3267 #if 1 3268 if (!restrict_comm) { /* check */ 3269 Vec lvec,rvec; 3270 PetscReal infty_error; 3271 3272 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3273 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3274 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3275 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3276 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3277 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3278 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3279 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3280 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3281 } 3282 #endif 3283 3284 /* assemble new additional is (if any) */ 3285 if (nis) { 3286 PetscInt **temp_idxs,*count_is,j,psum; 3287 3288 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3289 ierr = PetscMalloc(nis*sizeof(PetscInt),&count_is);CHKERRQ(ierr); 3290 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3291 ptr_idxs = recv_buffer_idxs_is; 3292 psum = 0; 3293 for (i=0;i<n_recvs;i++) { 3294 for (j=0;j<nis;j++) { 3295 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3296 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3297 psum += plen; 3298 ptr_idxs += plen+1; /* shift pointer to received data */ 3299 } 3300 } 3301 ierr = PetscMalloc(nis*sizeof(PetscInt*),&temp_idxs);CHKERRQ(ierr); 3302 ierr = PetscMalloc(psum*sizeof(PetscInt),&temp_idxs[0]);CHKERRQ(ierr); 3303 for (i=1;i<nis;i++) { 3304 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3305 } 3306 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3307 ptr_idxs = recv_buffer_idxs_is; 3308 for (i=0;i<n_recvs;i++) { 3309 for (j=0;j<nis;j++) { 3310 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3311 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3312 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3313 ptr_idxs += plen+1; /* shift pointer to received data */ 3314 } 3315 } 3316 for (i=0;i<nis;i++) { 3317 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3318 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3319 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3320 } 3321 ierr = PetscFree(count_is);CHKERRQ(ierr); 3322 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3323 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3324 } 3325 /* free workspace */ 3326 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3327 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3328 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3329 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3330 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3331 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3332 if (isdense) { 3333 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3334 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3335 } else { 3336 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3337 } 3338 if (nis) { 3339 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3340 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3341 } 3342 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3343 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3344 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3345 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3346 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3347 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3348 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3349 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3350 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3351 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3352 ierr = PetscFree(onodes);CHKERRQ(ierr); 3353 if (nis) { 3354 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3355 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3356 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3357 } 3358 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3359 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3360 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3361 for (i=0;i<nis;i++) { 3362 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3363 } 3364 } 3365 PetscFunctionReturn(0); 3366 } 3367 3368 /* temporary hack into ksp private data structure */ 3369 #include <petsc-private/kspimpl.h> 3370 3371 #undef __FUNCT__ 3372 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3373 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3374 { 3375 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3376 PC_IS *pcis = (PC_IS*)pc->data; 3377 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3378 MatNullSpace CoarseNullSpace=NULL; 3379 ISLocalToGlobalMapping coarse_islg; 3380 IS coarse_is,*isarray; 3381 PetscInt i,im_active=-1,active_procs=-1; 3382 PetscInt nis,nisdofs,nisneu; 3383 PC pc_temp; 3384 PCType coarse_pc_type; 3385 KSPType coarse_ksp_type; 3386 PetscBool multilevel_requested,multilevel_allowed; 3387 PetscBool setsym,issym,isherm,isbddc,isnn,coarse_reuse; 3388 MatStructure matstruct; 3389 Mat t_coarse_mat_is; 3390 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3391 PetscMPIInt all_procs; 3392 PetscBool csin_ml,csin_ds,csin,csin_type_simple; 3393 PetscErrorCode ierr; 3394 3395 PetscFunctionBegin; 3396 /* Assign global numbering to coarse dofs */ 3397 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3398 PetscInt ocoarse_size; 3399 ocoarse_size = pcbddc->coarse_size; 3400 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3401 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3402 /* see if we can avoid some work */ 3403 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3404 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3405 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3406 coarse_reuse = PETSC_FALSE; 3407 } else { /* we can safely reuse already computed coarse matrix */ 3408 coarse_reuse = PETSC_TRUE; 3409 } 3410 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3411 coarse_reuse = PETSC_FALSE; 3412 } 3413 /* reset any subassembling information */ 3414 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3415 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3416 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3417 coarse_reuse = PETSC_TRUE; 3418 } 3419 3420 /* count "active" (i.e. with positive local size) and "void" processes */ 3421 im_active = !!(pcis->n); 3422 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3423 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3424 void_procs = all_procs-active_procs; 3425 csin_type_simple = PETSC_TRUE; 3426 if (pcbddc->current_level) { 3427 csin_ml = PETSC_TRUE; 3428 ncoarse_ml = void_procs; 3429 csin_ds = PETSC_TRUE; 3430 ncoarse_ds = void_procs; 3431 if (!void_procs) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 3432 } else { 3433 csin_ml = PETSC_FALSE; 3434 ncoarse_ml = all_procs; 3435 if (void_procs) { 3436 csin_ds = PETSC_TRUE; 3437 ncoarse_ds = void_procs; 3438 csin_type_simple = PETSC_FALSE; 3439 } else { 3440 csin_ds = PETSC_FALSE; 3441 ncoarse_ds = all_procs; 3442 } 3443 } 3444 3445 /* 3446 test if we can go multilevel: three conditions must be satisfied: 3447 - we have not exceeded the number of levels requested 3448 - we can actually subassemble the active processes 3449 - we can find a suitable number of MPI processes where we can place the subassembled problem 3450 */ 3451 multilevel_allowed = PETSC_FALSE; 3452 multilevel_requested = PETSC_FALSE; 3453 if (pcbddc->current_level < pcbddc->max_levels) { 3454 multilevel_requested = PETSC_TRUE; 3455 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3456 multilevel_allowed = PETSC_FALSE; 3457 } else { 3458 multilevel_allowed = PETSC_TRUE; 3459 } 3460 } 3461 /* determine number of process partecipating to coarse solver */ 3462 if (multilevel_allowed) { 3463 ncoarse = ncoarse_ml; 3464 csin = csin_ml; 3465 } else { 3466 ncoarse = ncoarse_ds; 3467 csin = csin_ds; 3468 } 3469 3470 /* creates temporary l2gmap and IS for coarse indexes */ 3471 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3472 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3473 3474 /* creates temporary MATIS object for coarse matrix */ 3475 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3476 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3477 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3478 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3479 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3480 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3481 3482 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3483 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3484 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3485 const PetscInt *idxs; 3486 ISLocalToGlobalMapping tmap; 3487 3488 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3489 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3490 /* allocate space for temporary storage */ 3491 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr); 3492 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr); 3493 /* allocate for IS array */ 3494 nisdofs = pcbddc->n_ISForDofsLocal; 3495 nisneu = !!pcbddc->NeumannBoundariesLocal; 3496 nis = nisdofs + nisneu; 3497 ierr = PetscMalloc(nis*sizeof(IS),&isarray);CHKERRQ(ierr); 3498 /* dofs splitting */ 3499 for (i=0;i<nisdofs;i++) { 3500 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3501 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3502 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3503 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3504 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3505 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3506 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3507 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3508 } 3509 /* neumann boundaries */ 3510 if (pcbddc->NeumannBoundariesLocal) { 3511 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3512 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3513 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3514 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3515 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3516 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3517 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3518 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3519 } 3520 /* free memory */ 3521 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3522 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3523 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3524 } else { 3525 nis = 0; 3526 nisdofs = 0; 3527 nisneu = 0; 3528 isarray = NULL; 3529 } 3530 /* destroy no longer needed map */ 3531 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3532 3533 /* restrict on coarse candidates (if needed) */ 3534 coarse_mat_is = NULL; 3535 if (csin) { 3536 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3537 PetscInt j,tissize,*nisindices; 3538 PetscInt *coarse_candidates; 3539 const PetscInt* tisindices; 3540 /* get coarse candidates' ranks in pc communicator */ 3541 ierr = PetscMalloc(all_procs*sizeof(PetscInt),&coarse_candidates);CHKERRQ(ierr); 3542 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3543 for (i=0,j=0;i<all_procs;i++) { 3544 if (!coarse_candidates[i]) { 3545 coarse_candidates[j]=i; 3546 j++; 3547 } 3548 } 3549 if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse); 3550 /* get a suitable subassembling pattern */ 3551 if (csin_type_simple) { 3552 PetscMPIInt rank; 3553 PetscInt issize,isidx; 3554 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3555 if (im_active) { 3556 issize = 1; 3557 isidx = (PetscInt)rank; 3558 } else { 3559 issize = 0; 3560 isidx = -1; 3561 } 3562 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3563 } else { 3564 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3565 } 3566 if (pcbddc->dbg_flag) { 3567 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3568 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 3569 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 3571 for (i=0;i<j;i++) { 3572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 3573 } 3574 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 3575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3576 } 3577 /* shift the pattern on coarse candidates */ 3578 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 3579 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3580 ierr = PetscMalloc(tissize*sizeof(PetscInt),&nisindices);CHKERRQ(ierr); 3581 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 3582 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 3583 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 3584 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 3585 } 3586 if (pcbddc->dbg_flag) { 3587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 3589 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 3590 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3591 } 3592 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 3593 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 3594 } else { 3595 if (pcbddc->dbg_flag) { 3596 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3597 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 3598 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3599 } 3600 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 3601 coarse_mat_is = t_coarse_mat_is; 3602 } 3603 3604 /* create local to global scatters for coarse problem */ 3605 if (pcbddc->new_primal_space) { 3606 PetscInt lrows; 3607 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3608 if (coarse_mat_is) { 3609 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 3610 } else { 3611 lrows = 0; 3612 } 3613 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 3614 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 3615 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 3616 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3617 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3618 } 3619 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3620 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 3621 3622 /* set defaults for coarse KSP and PC */ 3623 if (multilevel_allowed) { 3624 coarse_ksp_type = KSPRICHARDSON; 3625 coarse_pc_type = PCBDDC; 3626 } else { 3627 coarse_ksp_type = KSPPREONLY; 3628 coarse_pc_type = PCREDUNDANT; 3629 } 3630 3631 /* print some info if requested */ 3632 if (pcbddc->dbg_flag) { 3633 if (!multilevel_allowed) { 3634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3635 if (multilevel_requested) { 3636 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3637 } else if (pcbddc->max_levels) { 3638 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3639 } 3640 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3641 } 3642 } 3643 3644 /* create the coarse KSP object only once with defaults */ 3645 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3646 issym = PETSC_FALSE; 3647 isherm = PETSC_FALSE; 3648 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3649 ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr); 3650 if (coarse_mat_is) { 3651 MatReuse coarse_mat_reuse; 3652 PetscViewer dbg_viewer; 3653 if (pcbddc->dbg_flag) { 3654 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 3655 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3656 } 3657 if (!pcbddc->coarse_ksp) { 3658 char prefix[256],str_level[16]; 3659 size_t len; 3660 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3661 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3662 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3663 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is,matstruct);CHKERRQ(ierr); 3664 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3665 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3666 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3667 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3668 /* prefix */ 3669 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3670 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3671 if (!pcbddc->current_level) { 3672 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3673 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3674 } else { 3675 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3676 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 3677 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 3678 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3679 *(prefix+len)='\0'; 3680 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 3681 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3682 } 3683 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3684 /* allow user customization */ 3685 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3686 } 3687 3688 /* get some info after set from options */ 3689 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3690 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3691 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3692 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 3693 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3694 isbddc = PETSC_FALSE; 3695 } 3696 3697 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 3698 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3699 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3700 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3701 if (nisdofs) { 3702 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 3703 for (i=0;i<nisdofs;i++) { 3704 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3705 } 3706 } 3707 if (nisneu) { 3708 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 3709 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 3710 } 3711 3712 /* assemble coarse matrix */ 3713 if (coarse_reuse) { 3714 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3715 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3716 coarse_mat_reuse = MAT_REUSE_MATRIX; 3717 } else { 3718 coarse_mat_reuse = MAT_INITIAL_MATRIX; 3719 } 3720 if (isbddc || isnn) { 3721 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 3722 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3723 if (pcbddc->dbg_flag) { 3724 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3725 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 3726 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 3727 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3728 } 3729 } 3730 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 3731 } else { 3732 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 3733 } 3734 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3735 3736 /* propagate symmetry info to coarse matrix */ 3737 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3738 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 3739 3740 /* set operators */ 3741 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3742 if (pcbddc->dbg_flag) { 3743 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 3744 } 3745 } else { /* processes non partecipating to coarse solver (if any) */ 3746 coarse_mat = 0; 3747 } 3748 ierr = PetscFree(isarray);CHKERRQ(ierr); 3749 3750 /* temporary disabled code */ 3751 #if 0 3752 /* Compute coarse null space (special handling by BDDC only) */ 3753 if (pcbddc->NullSpace) { 3754 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3755 if (isbddc) { 3756 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3757 } else { 3758 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3759 } 3760 } 3761 #endif 3762 3763 if (pcbddc->coarse_ksp) { 3764 Vec crhs,csol; 3765 PetscBool ispreonly; 3766 /* setup coarse ksp */ 3767 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3768 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 3769 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 3770 /* hack */ 3771 if (!csol) { 3772 ierr = MatGetVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 3773 } 3774 if (!crhs) { 3775 ierr = MatGetVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 3776 } 3777 /* Check coarse problem if in debug mode or if solving with an iterative method */ 3778 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3779 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 3780 KSP check_ksp; 3781 KSPType check_ksp_type; 3782 PC check_pc; 3783 Vec check_vec,coarse_vec; 3784 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3785 PetscInt its; 3786 PetscBool compute_eigs; 3787 PetscReal *eigs_r,*eigs_c; 3788 PetscInt neigs; 3789 3790 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3791 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 3792 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3793 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3794 if (ispreonly) { 3795 check_ksp_type = KSPPREONLY; 3796 compute_eigs = PETSC_FALSE; 3797 } else { 3798 check_ksp_type = KSPGMRES; 3799 compute_eigs = PETSC_TRUE; 3800 } 3801 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3802 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 3803 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 3804 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 3805 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3806 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3807 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3808 /* create random vec */ 3809 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 3810 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 3811 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3812 if (CoarseNullSpace) { 3813 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3814 } 3815 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3816 /* solve coarse problem */ 3817 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 3818 if (CoarseNullSpace) { 3819 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 3820 } 3821 /* set eigenvalue estimation if preonly has not been requested */ 3822 if (compute_eigs) { 3823 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_r);CHKERRQ(ierr); 3824 ierr = PetscMalloc((pcbddc->coarse_size+1)*sizeof(PetscReal),&eigs_c);CHKERRQ(ierr); 3825 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 3826 lambda_max = eigs_r[neigs-1]; 3827 lambda_min = eigs_r[0]; 3828 if (pcbddc->use_coarse_estimates) { 3829 if (lambda_max>lambda_min) { 3830 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 3831 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 3832 } 3833 } 3834 } 3835 3836 /* check coarse problem residual error */ 3837 if (pcbddc->dbg_flag) { 3838 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 3839 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3840 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 3841 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3842 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 3843 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3844 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3845 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 3846 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 3847 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 3848 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3849 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3850 if (compute_eigs) { 3851 PetscReal lambda_max_s,lambda_min_s; 3852 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3853 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 3854 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 3855 for (i=0;i<neigs;i++) { 3856 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 3857 } 3858 } 3859 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 3860 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 3861 } 3862 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3863 if (compute_eigs) { 3864 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 3865 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 3866 } 3867 } 3868 } 3869 /* print additional info */ 3870 if (pcbddc->dbg_flag) { 3871 /* waits until all processes reaches this point */ 3872 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 3873 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3874 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3875 } 3876 3877 /* free memory */ 3878 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3879 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3880 PetscFunctionReturn(0); 3881 } 3882 3883 #undef __FUNCT__ 3884 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3885 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3886 { 3887 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3888 PC_IS* pcis = (PC_IS*)pc->data; 3889 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3890 PetscInt i,coarse_size; 3891 PetscInt *local_primal_indices; 3892 PetscErrorCode ierr; 3893 3894 PetscFunctionBegin; 3895 /* Compute global number of coarse dofs */ 3896 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 3897 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 3898 } 3899 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3900 3901 /* check numbering */ 3902 if (pcbddc->dbg_flag) { 3903 PetscScalar coarsesum,*array; 3904 3905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3906 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3907 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3908 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3909 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3910 for (i=0;i<pcbddc->local_primal_size;i++) { 3911 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3912 } 3913 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3914 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3915 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3916 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3917 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3918 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3919 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3920 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3921 for (i=0;i<pcis->n;i++) { 3922 if (array[i] == 1.0) { 3923 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3924 } 3925 } 3926 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3927 for (i=0;i<pcis->n;i++) { 3928 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3929 } 3930 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3931 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3932 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3933 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3934 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3935 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3936 if (pcbddc->dbg_flag > 1) { 3937 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3938 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3939 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3940 for (i=0;i<pcbddc->local_primal_size;i++) { 3941 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3942 } 3943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3944 } 3945 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3946 } 3947 /* get back data */ 3948 *coarse_size_n = coarse_size; 3949 *local_primal_indices_n = local_primal_indices; 3950 PetscFunctionReturn(0); 3951 } 3952 3953 #undef __FUNCT__ 3954 #define __FUNCT__ "PCBDDCGlobalToLocal" 3955 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis) 3956 { 3957 PC_IS* pcis = (PC_IS*)pc->data; 3958 IS localis_t; 3959 PetscInt i,lsize,*idxs; 3960 PetscScalar *vals; 3961 PetscErrorCode ierr; 3962 3963 PetscFunctionBegin; 3964 /* get dirichlet indices in local ordering exploiting local to global map */ 3965 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 3966 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3967 for (i=0;i<lsize;i++) vals[i] = 1.0; 3968 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3969 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3970 if (idxs) { /* multilevel guard */ 3971 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3972 } 3973 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 3974 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 3975 ierr = PetscFree(vals);CHKERRQ(ierr); 3976 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 3977 /* now compute dirichlet set in local ordering */ 3978 ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3979 ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3980 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3981 for (i=0,lsize=0;i<pcis->n;i++) { 3982 if (vals[i] == 1.0) { 3983 lsize++; 3984 } 3985 } 3986 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 3987 for (i=0,lsize=0;i<pcis->n;i++) { 3988 if (vals[i] == 1.0) { 3989 idxs[lsize++] = i; 3990 } 3991 } 3992 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 3993 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 3994 *localis = localis_t; 3995 PetscFunctionReturn(0); 3996 } 3997