1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Allocate needed vectors */ 18 ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr); 19 20 /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */ 21 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 22 23 /* Setup local solvers ksp_D and ksp_R */ 24 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 25 26 /* Change global null space passed in by the user if change of basis has been requested */ 27 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 28 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 29 } 30 31 /* 32 Setup local correction and local part of coarse basis. 33 Gives back the dense local part of the coarse matrix in column major ordering 34 */ 35 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 36 37 /* Compute total number of coarse nodes and setup coarse solver */ 38 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 39 40 /* free */ 41 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCBDDCResetCustomization" 47 PetscErrorCode PCBDDCResetCustomization(PC pc) 48 { 49 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 50 PetscInt i; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 55 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 56 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 58 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 59 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 60 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 61 for (i=0;i<pcbddc->n_ISForDofs;i++) { 62 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 63 } 64 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 65 pcbddc->n_ISForDofs = 0; 66 PetscFunctionReturn(0); 67 } 68 69 #undef __FUNCT__ 70 #define __FUNCT__ "PCBDDCResetTopography" 71 PetscErrorCode PCBDDCResetTopography(PC pc) 72 { 73 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 74 PetscErrorCode ierr; 75 76 PetscFunctionBegin; 77 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 78 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 79 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 80 PetscFunctionReturn(0); 81 } 82 83 #undef __FUNCT__ 84 #define __FUNCT__ "PCBDDCResetSolvers" 85 PetscErrorCode PCBDDCResetSolvers(PC pc) 86 { 87 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 92 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 96 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 98 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 100 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 102 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 103 ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr); 104 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 106 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 107 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "PCBDDCCreateWorkVectors" 113 PetscErrorCode PCBDDCCreateWorkVectors(PC pc) 114 { 115 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 116 PC_IS *pcis = (PC_IS*)pc->data; 117 VecType impVecType; 118 PetscInt n_vertices,n_constraints,local_primal_size,n_R; 119 PetscErrorCode ierr; 120 121 PetscFunctionBegin; 122 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 123 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr); 124 local_primal_size = n_constraints+n_vertices; 125 n_R = pcis->n-n_vertices; 126 /* local work vectors */ 127 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 128 ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr); 129 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 130 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 131 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 132 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 133 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 134 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 135 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 136 if (n_constraints) { 137 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 138 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 139 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 140 } 141 PetscFunctionReturn(0); 142 } 143 144 #undef __FUNCT__ 145 #define __FUNCT__ "PCBDDCSetUpCorrection" 146 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 147 { 148 PetscErrorCode ierr; 149 /* pointers to pcis and pcbddc */ 150 PC_IS* pcis = (PC_IS*)pc->data; 151 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 152 /* submatrices of local problem */ 153 Mat A_RV,A_VR,A_VV; 154 /* working matrices */ 155 Mat M1,M2,M3,C_CR; 156 /* working vectors */ 157 Vec vec1_C,vec2_C,vec1_V,vec2_V; 158 /* additional working stuff */ 159 IS is_aux; 160 ISLocalToGlobalMapping BtoNmap; 161 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 162 const PetscScalar *array,*row_cmat_values; 163 const PetscInt *row_cmat_indices,*idx_R_local; 164 PetscInt *vertices,*idx_V_B,*auxindices; 165 PetscInt n_vertices,n_constraints,size_of_constraint; 166 PetscInt i,j,n_R,n_D,n_B; 167 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 168 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 169 MatType impMatType; 170 /* some shortcuts to scalars */ 171 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 172 /* for debugging purposes */ 173 PetscReal *coarsefunctions_errors,*constraints_errors; 174 175 PetscFunctionBegin; 176 /* get number of vertices and their local indices */ 177 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 178 n_constraints = pcbddc->local_primal_size-n_vertices; 179 /* Set Non-overlapping dimensions */ 180 n_B = pcis->n_B; n_D = pcis->n - n_B; 181 n_R = pcis->n-n_vertices; 182 183 /* Set types for local objects needed by BDDC precondtioner */ 184 impMatType = MATSEQDENSE; 185 186 /* Allocating some extra storage just to be safe */ 187 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 188 for (i=0;i<pcis->n;i++) auxindices[i]=i; 189 190 /* vertices in boundary numbering */ 191 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 192 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr); 193 ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 194 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 195 if (i != n_vertices) { 196 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 197 } 198 199 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 200 if (n_constraints) { 201 /* work vectors */ 202 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 203 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 204 /* auxiliary matrices */ 205 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 206 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 207 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 208 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 209 210 /* Extract constraints on R nodes: C_{CR} */ 211 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 212 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 213 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 214 215 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 216 for (i=0;i<n_constraints;i++) { 217 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 218 /* Get row of constraint matrix in R numbering */ 219 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 220 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 221 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 222 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 223 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 224 /* Solve for row of constraint matrix in R numbering */ 225 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 226 /* Set values in local_auxmat2 */ 227 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 228 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 229 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 230 } 231 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 232 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 234 235 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 236 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 237 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 238 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 239 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 240 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 241 ierr = MatSetUp(M1);CHKERRQ(ierr); 242 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 243 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 244 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 245 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 246 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 247 ierr = MatDestroy(&M2);CHKERRQ(ierr); 248 ierr = MatDestroy(&M3);CHKERRQ(ierr); 249 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 250 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 251 } 252 253 /* Get submatrices from subdomain matrix */ 254 if (n_vertices) { 255 PetscInt ibs,mbs; 256 PetscBool issbaij; 257 Mat newmat; 258 259 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 260 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 261 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 262 if (ibs != mbs) { /* need to convert to SEQAIJ */ 263 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 264 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 265 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 266 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 267 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 268 } else { 269 /* this is safe */ 270 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 271 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 272 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 273 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 274 /* which of the two approaches is faster? */ 275 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 276 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 277 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 278 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 279 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 280 } else { 281 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 282 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 283 } 284 } 285 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 286 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 287 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 288 } 289 290 /* Matrix of coarse basis functions (local) */ 291 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 292 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 293 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 294 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 295 if (pcbddc->switch_static || pcbddc->dbg_flag) { 296 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 297 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 298 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 299 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 300 } 301 302 if (pcbddc->dbg_flag) { 303 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 304 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 305 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 306 } 307 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 308 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 309 310 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 311 312 /* vertices */ 313 for (i=0;i<n_vertices;i++) { 314 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 315 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 316 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 317 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 318 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 319 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 320 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 321 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 322 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 323 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 324 if (n_constraints) { 325 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 326 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 327 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 328 } 329 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 330 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 331 332 /* Set values in coarse basis function and subdomain part of coarse_mat */ 333 /* coarse basis functions */ 334 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 335 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 336 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 337 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 338 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 339 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 340 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 341 if (pcbddc->switch_static || pcbddc->dbg_flag) { 342 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 343 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 344 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 345 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 346 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 347 } 348 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 349 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 350 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 351 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 352 if (n_constraints) { 353 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 354 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 355 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 356 } 357 358 /* check */ 359 if (pcbddc->dbg_flag) { 360 /* assemble subdomain vector on local nodes */ 361 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 362 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 363 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 364 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 365 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 366 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 367 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 368 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 369 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 370 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 371 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 372 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 373 if (n_constraints) { 374 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 375 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 377 } 378 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 380 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 381 /* check saddle point solution */ 382 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 383 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 384 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 385 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 386 /* shift by the identity matrix */ 387 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 388 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 389 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 390 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 391 } 392 } 393 394 /* constraints */ 395 for (i=0;i<n_constraints;i++) { 396 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 397 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 398 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 399 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 400 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 401 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 402 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 403 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 404 if (n_vertices) { 405 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 406 } 407 /* Set values in coarse basis function and subdomain part of coarse_mat */ 408 /* coarse basis functions */ 409 j = i+n_vertices; /* don't touch this! */ 410 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 411 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 412 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 413 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 414 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 416 if (pcbddc->switch_static || pcbddc->dbg_flag) { 417 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 418 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 419 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 420 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 422 } 423 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 424 if (n_vertices) { 425 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 426 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 427 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 428 } 429 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 430 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 431 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 432 433 if (pcbddc->dbg_flag) { 434 /* assemble subdomain vector on nodes */ 435 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 436 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 437 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 438 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 439 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 440 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 441 /* assemble subdomain vector of lagrange multipliers */ 442 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 443 if (n_vertices) { 444 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 445 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 446 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 447 } 448 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 449 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 450 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 451 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 452 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 453 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 454 /* check saddle point solution */ 455 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 456 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 457 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 458 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 459 /* shift by the identity matrix */ 460 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 461 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 462 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 463 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 464 } 465 } 466 /* call assembling routines for local coarse basis */ 467 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 468 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 469 if (pcbddc->switch_static || pcbddc->dbg_flag) { 470 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 471 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 472 } 473 474 /* compute other basis functions for non-symmetric problems */ 475 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 476 if (!setsym || (setsym && !issym)) { 477 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 478 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 479 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 480 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 481 if (pcbddc->switch_static || pcbddc->dbg_flag) { 482 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 483 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 484 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 485 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 486 } 487 for (i=0;i<pcbddc->local_primal_size;i++) { 488 if (n_constraints) { 489 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 490 for (j=0;j<n_constraints;j++) { 491 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 492 } 493 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 494 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 495 } 496 if (i<n_vertices) { 497 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 498 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 501 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 502 if (n_constraints) { 503 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 504 } 505 } else { 506 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 507 } 508 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 509 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 510 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 511 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 512 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 513 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 514 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 515 if (i<n_vertices) { 516 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 517 } 518 if (pcbddc->switch_static || pcbddc->dbg_flag) { 519 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 520 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 521 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 522 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 523 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 524 } 525 526 if (pcbddc->dbg_flag) { 527 /* assemble subdomain vector on nodes */ 528 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 529 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 530 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 531 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 532 if (i<n_vertices) { 533 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 534 } 535 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 536 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 537 /* assemble subdomain vector of lagrange multipliers */ 538 for (j=0;j<pcbddc->local_primal_size;j++) { 539 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 540 } 541 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 542 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 543 /* check saddle point solution */ 544 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 545 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 546 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 547 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 548 /* shift by the identity matrix */ 549 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 550 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 551 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 552 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 553 } 554 } 555 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 556 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 if (pcbddc->switch_static || pcbddc->dbg_flag) { 558 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 } 561 } 562 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 563 /* Checking coarse_sub_mat and coarse basis functios */ 564 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 565 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 566 if (pcbddc->dbg_flag) { 567 Mat coarse_sub_mat; 568 Mat AUXMAT,TM1,TM2,TM3,TM4; 569 Mat coarse_phi_D,coarse_phi_B; 570 Mat coarse_psi_D,coarse_psi_B; 571 Mat A_II,A_BB,A_IB,A_BI; 572 MatType checkmattype=MATSEQAIJ; 573 PetscReal real_value; 574 575 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 576 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 577 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 578 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 579 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 580 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 581 if (pcbddc->coarse_psi_B) { 582 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 583 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 584 } 585 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 586 587 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 590 if (pcbddc->coarse_psi_B) { 591 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 592 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 593 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 594 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 595 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 596 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 597 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 598 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 599 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 600 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 601 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 602 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 603 } else { 604 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 605 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 606 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 607 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 608 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 609 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 610 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 611 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 612 } 613 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 614 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 615 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 616 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 617 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 618 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 623 for (i=0;i<pcbddc->local_primal_size;i++) { 624 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 625 } 626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 627 for (i=0;i<pcbddc->local_primal_size;i++) { 628 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 629 } 630 if (pcbddc->coarse_psi_B) { 631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 632 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 634 } 635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 636 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 637 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 638 } 639 } 640 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 641 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 642 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 643 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 644 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 645 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 646 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 647 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 648 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 649 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 650 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 651 if (pcbddc->coarse_psi_B) { 652 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 653 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 654 } 655 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 656 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 657 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 658 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 659 } 660 /* free memory */ 661 if (n_vertices) { 662 ierr = PetscFree(vertices);CHKERRQ(ierr); 663 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 664 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 665 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 666 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 667 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 668 } 669 if (n_constraints) { 670 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 671 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 672 ierr = MatDestroy(&M1);CHKERRQ(ierr); 673 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 674 } 675 ierr = PetscFree(auxindices);CHKERRQ(ierr); 676 /* get back data */ 677 *coarse_submat_vals_n = coarse_submat_vals; 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNCT__ 682 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 683 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 684 { 685 PC_IS* pcis = (PC_IS*)(pc->data); 686 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 687 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 688 PetscBool issbaij,isbaij; 689 /* manage repeated solves */ 690 MatReuse reuse; 691 MatStructure matstruct; 692 PetscErrorCode ierr; 693 694 PetscFunctionBegin; 695 /* get mat flags */ 696 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 697 reuse = MAT_INITIAL_MATRIX; 698 if (pc->setupcalled) { 699 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 700 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 701 if (matstruct == SAME_NONZERO_PATTERN) { 702 reuse = MAT_REUSE_MATRIX; 703 } else { 704 reuse = MAT_INITIAL_MATRIX; 705 } 706 } 707 if (reuse == MAT_INITIAL_MATRIX) { 708 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 709 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 710 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 711 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 712 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 713 } 714 715 /* transform local matrices if needed */ 716 if (pcbddc->use_change_of_basis) { 717 Mat change_mat_all; 718 PetscScalar *row_cmat_values; 719 PetscInt *row_cmat_indices; 720 PetscInt *nnz,*is_indices,*temp_indices; 721 PetscInt i,j,k,n_D,n_B; 722 723 /* Get Non-overlapping dimensions */ 724 n_B = pcis->n_B; 725 n_D = pcis->n-n_B; 726 727 /* compute nonzero structure of change of basis on all local nodes */ 728 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 729 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 730 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 731 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 732 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 733 k=1; 734 for (i=0;i<n_B;i++) { 735 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 736 nnz[is_indices[i]]=j; 737 if (k < j) k = j; 738 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 739 } 740 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 741 /* assemble change of basis matrix on the whole set of local dofs */ 742 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 743 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 744 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 745 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 746 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 747 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 748 for (i=0;i<n_D;i++) { 749 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 750 } 751 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 752 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 753 for (i=0;i<n_B;i++) { 754 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 755 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 756 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 757 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 758 } 759 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 760 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 761 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 762 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 763 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 764 if (!issbaij && !isbaij) { 765 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 766 } else { 767 Mat work_mat; 768 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 769 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 770 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 771 } 772 /* 773 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 774 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 775 */ 776 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 777 ierr = PetscFree(nnz);CHKERRQ(ierr); 778 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 779 } else { 780 /* without change of basis, the local matrix is unchanged */ 781 if (!pcbddc->local_mat) { 782 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 783 pcbddc->local_mat = matis->A; 784 } 785 } 786 787 /* get submatrices */ 788 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 789 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 790 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 791 if (!issbaij) { 792 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 793 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 794 } else { 795 Mat newmat; 796 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 797 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 798 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 799 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 800 } 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNCT__ 805 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 806 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 807 { 808 PC_IS* pcis = (PC_IS*)(pc->data); 809 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 810 IS is_aux1,is_aux2; 811 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 812 PetscInt n_vertices,i,j,n_R,n_D,n_B; 813 PetscInt vbs,bs; 814 PetscBT bitmask; 815 PetscErrorCode ierr; 816 817 PetscFunctionBegin; 818 /* Set Non-overlapping dimensions */ 819 n_B = pcis->n_B; n_D = pcis->n - n_B; 820 /* get vertex indices from constraint matrix */ 821 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 822 823 /* create auxiliary bitmask */ 824 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 825 for (i=0;i<n_vertices;i++) { 826 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 827 } 828 829 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 830 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 831 for (i=0, n_R=0; i<pcis->n; i++) { 832 if (!PetscBTLookup(bitmask,i)) { 833 idx_R_local[n_R] = i; 834 n_R++; 835 } 836 } 837 838 /* Block code */ 839 vbs = 1; 840 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 841 if (bs>1 && !(n_vertices%bs)) { 842 PetscBool is_blocked = PETSC_TRUE; 843 PetscInt *vary; 844 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 845 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 846 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 847 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 848 for (i=0; i<n_vertices; i++) { 849 if (vary[i]!=0 && vary[i]!=bs) { 850 is_blocked = PETSC_FALSE; 851 break; 852 } 853 } 854 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 855 vbs = bs; 856 for (i=0;i<n_R/vbs;i++) { 857 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 858 } 859 } 860 ierr = PetscFree(vary);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(vertices);CHKERRQ(ierr); 863 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 864 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 865 866 /* print some info if requested */ 867 if (pcbddc->dbg_flag) { 868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 869 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 870 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 872 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 873 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 874 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 875 } 876 877 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 878 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 879 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 880 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 881 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 882 for (i=0; i<n_D; i++) { 883 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 884 } 885 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 886 for (i=0, j=0; i<n_R; i++) { 887 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 888 aux_array1[j++] = i; 889 } 890 } 891 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 892 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 893 for (i=0, j=0; i<n_B; i++) { 894 if (!PetscBTLookup(bitmask,is_indices[i])) { 895 aux_array2[j++] = i; 896 } 897 } 898 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 899 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 900 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 901 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 902 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 903 904 if (pcbddc->switch_static || pcbddc->dbg_flag) { 905 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 906 for (i=0, j=0; i<n_R; i++) { 907 if (PetscBTLookup(bitmask,idx_R_local[i])) { 908 aux_array1[j++] = i; 909 } 910 } 911 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 912 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 913 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 914 } 915 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 916 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 923 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 924 { 925 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 926 PC_IS *pcis = (PC_IS*)pc->data; 927 PC pc_temp; 928 Mat A_RR; 929 Vec vec1,vec2,vec3; 930 MatStructure matstruct; 931 PetscScalar m_one = -1.0; 932 PetscReal value; 933 PetscInt n_D,n_R,ibs,mbs; 934 PetscBool use_exact,use_exact_reduced,issbaij; 935 PetscErrorCode ierr; 936 /* prefixes stuff */ 937 char dir_prefix[256],neu_prefix[256],str_level[3]; 938 size_t len; 939 940 PetscFunctionBegin; 941 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 942 943 /* compute prefixes */ 944 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 945 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 946 if (!pcbddc->current_level) { 947 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 948 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 949 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 950 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 951 } else { 952 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 953 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 954 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 955 len -= 15; /* remove "pc_bddc_coarse_" */ 956 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 957 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 958 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 959 *(dir_prefix+len)='\0'; 960 *(neu_prefix+len)='\0'; 961 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 962 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 963 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 964 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 965 } 966 967 /* DIRICHLET PROBLEM */ 968 /* Matrix for Dirichlet problem is pcis->A_II */ 969 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 970 if (!pcbddc->ksp_D) { /* create object if not yet build */ 971 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 972 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 973 /* default */ 974 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 975 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 976 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 977 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 978 if (issbaij) { 979 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 980 } else { 981 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 982 } 983 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 984 } 985 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 986 /* Allow user's customization */ 987 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 988 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 989 if (!n_D) { 990 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 991 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 992 } 993 /* Set Up KSP for Dirichlet problem of BDDC */ 994 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 995 /* set ksp_D into pcis data */ 996 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 997 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 998 pcis->ksp_D = pcbddc->ksp_D; 999 1000 /* NEUMANN PROBLEM */ 1001 /* Matrix for Neumann problem is A_RR -> we need to create it */ 1002 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1003 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1004 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1005 if (ibs != mbs) { 1006 Mat newmat; 1007 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1008 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1009 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1010 } else { 1011 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 1012 } 1013 if (!pcbddc->ksp_R) { /* create object if not yet build */ 1014 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1015 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1016 /* default */ 1017 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1018 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1019 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1020 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1021 if (issbaij) { 1022 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1023 } else { 1024 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1025 } 1026 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1027 } 1028 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1029 /* Allow user's customization */ 1030 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1031 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1032 if (!n_R) { 1033 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1034 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1035 } 1036 /* Set Up KSP for Neumann problem of BDDC */ 1037 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1038 1039 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1040 1041 /* Dirichlet */ 1042 ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr); 1043 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1044 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1045 ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr); 1046 ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr); 1047 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1048 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1049 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1050 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1051 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1052 /* need to be adapted? */ 1053 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1054 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1055 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1056 /* print info */ 1057 if (pcbddc->dbg_flag) { 1058 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1059 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1060 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1061 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1063 } 1064 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1065 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1066 } 1067 1068 /* Neumann */ 1069 ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr); 1070 ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr); 1071 ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr); 1072 ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr); 1073 ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr); 1074 ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr); 1075 ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr); 1076 ierr = VecDestroy(&vec1);CHKERRQ(ierr); 1077 ierr = VecDestroy(&vec2);CHKERRQ(ierr); 1078 ierr = VecDestroy(&vec3);CHKERRQ(ierr); 1079 /* need to be adapted? */ 1080 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1081 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1082 /* print info */ 1083 if (pcbddc->dbg_flag) { 1084 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1086 } 1087 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1088 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1089 } 1090 /* free Neumann problem's matrix */ 1091 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1097 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1098 { 1099 PetscErrorCode ierr; 1100 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1101 1102 PetscFunctionBegin; 1103 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1104 if (pcbddc->local_auxmat1) { 1105 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1106 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1107 } 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1113 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1114 { 1115 PetscErrorCode ierr; 1116 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1117 PC_IS* pcis = (PC_IS*) (pc->data); 1118 const PetscScalar zero = 0.0; 1119 1120 PetscFunctionBegin; 1121 /* Application of PHI^T (or PSI^T) */ 1122 if (pcbddc->coarse_psi_B) { 1123 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1124 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1125 } else { 1126 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1127 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1128 } 1129 /* Scatter data of coarse_rhs */ 1130 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1131 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1132 1133 /* Local solution on R nodes */ 1134 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1135 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1136 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1137 if (pcbddc->switch_static) { 1138 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1139 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1140 } 1141 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1142 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1143 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 if (pcbddc->switch_static) { 1146 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1148 } 1149 1150 /* Coarse solution */ 1151 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1153 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1154 } 1155 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1156 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 1158 /* Sum contributions from two levels */ 1159 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1160 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1166 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1167 { 1168 PetscErrorCode ierr; 1169 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1170 1171 PetscFunctionBegin; 1172 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1178 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1179 { 1180 PetscErrorCode ierr; 1181 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1182 1183 PetscFunctionBegin; 1184 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 /* uncomment for testing purposes */ 1189 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1192 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1193 { 1194 PetscErrorCode ierr; 1195 PC_IS* pcis = (PC_IS*)(pc->data); 1196 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1197 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1198 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1199 MatType impMatType=MATSEQAIJ; 1200 /* one and zero */ 1201 PetscScalar one=1.0,zero=0.0; 1202 /* space to store constraints and their local indices */ 1203 PetscScalar *temp_quadrature_constraint; 1204 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1205 /* iterators */ 1206 PetscInt i,j,k,total_counts,temp_start_ptr; 1207 /* stuff to store connected components stored in pcbddc->mat_graph */ 1208 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1209 PetscInt n_ISForFaces,n_ISForEdges; 1210 /* near null space stuff */ 1211 MatNullSpace nearnullsp; 1212 const Vec *nearnullvecs; 1213 Vec *localnearnullsp; 1214 PetscBool nnsp_has_cnst; 1215 PetscInt nnsp_size; 1216 PetscScalar *array; 1217 /* BLAS integers */ 1218 PetscBLASInt lwork,lierr; 1219 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1220 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1221 /* LAPACK working arrays for SVD or POD */ 1222 PetscBool skip_lapack; 1223 PetscScalar *work; 1224 PetscReal *singular_vals; 1225 #if defined(PETSC_USE_COMPLEX) 1226 PetscReal *rwork; 1227 #endif 1228 #if defined(PETSC_MISSING_LAPACK_GESVD) 1229 PetscBLASInt Blas_one_2=1; 1230 PetscScalar *temp_basis,*correlation_mat; 1231 #else 1232 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1233 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1234 #endif 1235 /* change of basis */ 1236 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1237 PetscBool boolforchange,qr_needed; 1238 PetscBT touched,change_basis,qr_needed_idx; 1239 /* auxiliary stuff */ 1240 PetscInt *nnz,*is_indices,*local_to_B; 1241 /* some quantities */ 1242 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1243 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1244 1245 1246 PetscFunctionBegin; 1247 /* Get index sets for faces, edges and vertices from graph */ 1248 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1249 pcbddc->use_vertices = PETSC_TRUE; 1250 } 1251 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1252 /* HACK: provide functions to set change of basis */ 1253 if (!ISForVertices && pcbddc->NullSpace) { 1254 pcbddc->use_change_of_basis = PETSC_TRUE; 1255 pcbddc->use_change_on_faces = PETSC_FALSE; 1256 } 1257 /* print some info */ 1258 if (pcbddc->dbg_flag) { 1259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1260 i = 0; 1261 if (ISForVertices) { 1262 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1263 } 1264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1266 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1268 } 1269 /* check if near null space is attached to global mat */ 1270 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1271 if (nearnullsp) { 1272 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1273 } else { /* if near null space is not provided BDDC uses constants by default */ 1274 nnsp_size = 0; 1275 nnsp_has_cnst = PETSC_TRUE; 1276 } 1277 /* get max number of constraints on a single cc */ 1278 max_constraints = nnsp_size; 1279 if (nnsp_has_cnst) max_constraints++; 1280 1281 /* 1282 Evaluate maximum storage size needed by the procedure 1283 - temp_indices will contain start index of each constraint stored as follows 1284 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1285 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1286 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1287 */ 1288 total_counts = n_ISForFaces+n_ISForEdges; 1289 total_counts *= max_constraints; 1290 n_vertices = 0; 1291 if (ISForVertices) { 1292 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1293 } 1294 total_counts += n_vertices; 1295 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1296 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1297 total_counts = 0; 1298 max_size_of_constraint = 0; 1299 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1300 if (i<n_ISForEdges) { 1301 used_IS = &ISForEdges[i]; 1302 } else { 1303 used_IS = &ISForFaces[i-n_ISForEdges]; 1304 } 1305 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1306 total_counts += j; 1307 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1308 } 1309 total_counts *= max_constraints; 1310 total_counts += n_vertices; 1311 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1312 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1313 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1314 /* local to boundary numbering */ 1315 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr); 1316 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1317 for (i=0;i<pcis->n;i++) local_to_B[i]=-1; 1318 for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i; 1319 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1320 /* get local part of global near null space vectors */ 1321 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1322 for (k=0;k<nnsp_size;k++) { 1323 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1324 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1325 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 } 1327 1328 /* whether or not to skip lapack calls */ 1329 skip_lapack = PETSC_TRUE; 1330 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1331 1332 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1333 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1334 PetscScalar temp_work; 1335 #if defined(PETSC_MISSING_LAPACK_GESVD) 1336 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1337 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1338 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1339 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1340 #if defined(PETSC_USE_COMPLEX) 1341 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1342 #endif 1343 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1344 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1345 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1346 lwork = -1; 1347 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1348 #if !defined(PETSC_USE_COMPLEX) 1349 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1350 #else 1351 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1352 #endif 1353 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1354 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1355 #else /* on missing GESVD */ 1356 /* SVD */ 1357 PetscInt max_n,min_n; 1358 max_n = max_size_of_constraint; 1359 min_n = max_constraints; 1360 if (max_size_of_constraint < max_constraints) { 1361 min_n = max_size_of_constraint; 1362 max_n = max_constraints; 1363 } 1364 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1365 #if defined(PETSC_USE_COMPLEX) 1366 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1367 #endif 1368 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1369 lwork = -1; 1370 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1371 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1372 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1373 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1374 #if !defined(PETSC_USE_COMPLEX) 1375 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1376 #else 1377 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1378 #endif 1379 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1380 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1381 #endif /* on missing GESVD */ 1382 /* Allocate optimal workspace */ 1383 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1384 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1385 } 1386 /* Now we can loop on constraining sets */ 1387 total_counts = 0; 1388 temp_indices[0] = 0; 1389 /* vertices */ 1390 if (ISForVertices) { 1391 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1392 if (nnsp_has_cnst) { /* consider all vertices */ 1393 for (i=0;i<n_vertices;i++) { 1394 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1395 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1396 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1397 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1398 total_counts++; 1399 } 1400 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1401 PetscBool used_vertex; 1402 for (i=0;i<n_vertices;i++) { 1403 used_vertex = PETSC_FALSE; 1404 k = 0; 1405 while (!used_vertex && k<nnsp_size) { 1406 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1407 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1408 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1409 temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]]; 1410 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1411 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1412 total_counts++; 1413 used_vertex = PETSC_TRUE; 1414 } 1415 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1416 k++; 1417 } 1418 } 1419 } 1420 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1421 n_vertices = total_counts; 1422 } 1423 1424 /* edges and faces */ 1425 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1426 if (i<n_ISForEdges) { 1427 used_IS = &ISForEdges[i]; 1428 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1429 } else { 1430 used_IS = &ISForFaces[i-n_ISForEdges]; 1431 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1432 } 1433 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1434 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1435 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1436 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1437 /* change of basis should not be performed on local periodic nodes */ 1438 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1439 if (nnsp_has_cnst) { 1440 PetscScalar quad_value; 1441 temp_constraints++; 1442 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1443 for (j=0;j<size_of_constraint;j++) { 1444 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1445 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1446 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1447 } 1448 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1449 total_counts++; 1450 } 1451 for (k=0;k<nnsp_size;k++) { 1452 PetscReal real_value; 1453 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1454 for (j=0;j<size_of_constraint;j++) { 1455 temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j]; 1456 temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]]; 1457 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1458 } 1459 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1460 /* check if array is null on the connected component */ 1461 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1462 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1463 if (real_value > 0.0) { /* keep indices and values */ 1464 temp_constraints++; 1465 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1466 total_counts++; 1467 } 1468 } 1469 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1470 valid_constraints = temp_constraints; 1471 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1472 if (!pcbddc->use_nnsp_true && temp_constraints) { 1473 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1474 1475 #if defined(PETSC_MISSING_LAPACK_GESVD) 1476 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1477 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1478 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1479 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1480 from that computed using LAPACKgesvd 1481 -> This is due to a different computation of eigenvectors in LAPACKheev 1482 -> The quality of the POD-computed basis will be the same */ 1483 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1484 /* Store upper triangular part of correlation matrix */ 1485 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1486 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1487 for (j=0;j<temp_constraints;j++) { 1488 for (k=0;k<j+1;k++) { 1489 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1490 } 1491 } 1492 /* compute eigenvalues and eigenvectors of correlation matrix */ 1493 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1494 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1495 #if !defined(PETSC_USE_COMPLEX) 1496 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1497 #else 1498 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1499 #endif 1500 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1501 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1502 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1503 j=0; 1504 while (j < temp_constraints && singular_vals[j] < tol) j++; 1505 total_counts=total_counts-j; 1506 valid_constraints = temp_constraints-j; 1507 /* scale and copy POD basis into used quadrature memory */ 1508 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1509 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1510 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1512 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1514 if (j<temp_constraints) { 1515 PetscInt ii; 1516 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1517 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1518 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1519 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1520 for (k=0;k<temp_constraints-j;k++) { 1521 for (ii=0;ii<size_of_constraint;ii++) { 1522 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1523 } 1524 } 1525 } 1526 #else /* on missing GESVD */ 1527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1528 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1530 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1531 #if !defined(PETSC_USE_COMPLEX) 1532 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1533 #else 1534 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1535 #endif 1536 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1537 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1538 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1539 k = temp_constraints; 1540 if (k > size_of_constraint) k = size_of_constraint; 1541 j = 0; 1542 while (j < k && singular_vals[k-j-1] < tol) j++; 1543 total_counts = total_counts-temp_constraints+k-j; 1544 valid_constraints = k-j; 1545 #endif /* on missing GESVD */ 1546 } 1547 /* setting change_of_basis flag is safe now */ 1548 if (boolforchange) { 1549 for (j=0;j<valid_constraints;j++) { 1550 PetscBTSet(change_basis,total_counts-j-1); 1551 } 1552 } 1553 } 1554 /* free index sets of faces, edges and vertices */ 1555 for (i=0;i<n_ISForFaces;i++) { 1556 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1557 } 1558 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1559 for (i=0;i<n_ISForEdges;i++) { 1560 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1561 } 1562 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1563 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1564 1565 /* free workspace */ 1566 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1567 ierr = PetscFree(work);CHKERRQ(ierr); 1568 #if defined(PETSC_USE_COMPLEX) 1569 ierr = PetscFree(rwork);CHKERRQ(ierr); 1570 #endif 1571 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1572 #if defined(PETSC_MISSING_LAPACK_GESVD) 1573 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1574 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1575 #endif 1576 } 1577 for (k=0;k<nnsp_size;k++) { 1578 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1579 } 1580 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1581 1582 /* set quantities in pcbddc data structure */ 1583 /* n_vertices defines the number of subdomain corners in the primal space */ 1584 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1585 pcbddc->local_primal_size = total_counts; 1586 pcbddc->n_vertices = n_vertices; 1587 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1588 1589 /* Create constraint matrix */ 1590 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1591 /* so we need to set it up properly either with or without change of basis */ 1592 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1593 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1594 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1595 /* array to compute a local numbering of constraints : vertices first then constraints */ 1596 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1597 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1598 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1599 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1600 /* auxiliary stuff for basis change */ 1601 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1602 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1603 1604 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1605 total_primal_vertices=0; 1606 for (i=0;i<pcbddc->local_primal_size;i++) { 1607 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1608 if (size_of_constraint == 1) { 1609 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1610 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1611 aux_primal_minloc[total_primal_vertices]=0; 1612 total_primal_vertices++; 1613 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1614 PetscInt min_loc,min_index; 1615 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1616 /* find first untouched local node */ 1617 k = 0; 1618 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1619 min_index = global_indices[k]; 1620 min_loc = k; 1621 /* search the minimum among global nodes already untouched on the cc */ 1622 for (k=1;k<size_of_constraint;k++) { 1623 /* there can be more than one constraint on a single connected component */ 1624 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1625 min_index = global_indices[k]; 1626 min_loc = k; 1627 } 1628 } 1629 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1630 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1631 aux_primal_minloc[total_primal_vertices]=min_loc; 1632 total_primal_vertices++; 1633 } 1634 } 1635 /* determine if a QR strategy is needed for change of basis */ 1636 qr_needed = PETSC_FALSE; 1637 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1638 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1639 if (PetscBTLookup(change_basis,i)) { 1640 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1641 j = 0; 1642 for (k=0;k<size_of_constraint;k++) { 1643 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1644 j++; 1645 } 1646 } 1647 /* found more than one primal dof on the cc */ 1648 if (j > 1) { 1649 PetscBTSet(qr_needed_idx,i); 1650 qr_needed = PETSC_TRUE; 1651 } 1652 } 1653 } 1654 /* free workspace */ 1655 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1656 1657 /* permute indices in order to have a sorted set of vertices */ 1658 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1659 1660 /* nonzero structure of constraint matrix */ 1661 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1662 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1663 j=total_primal_vertices; 1664 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1665 if (!PetscBTLookup(change_basis,i)) { 1666 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1667 j++; 1668 } 1669 } 1670 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1671 ierr = PetscFree(nnz);CHKERRQ(ierr); 1672 /* set values in constraint matrix */ 1673 for (i=0;i<total_primal_vertices;i++) { 1674 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1675 } 1676 total_counts = total_primal_vertices; 1677 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1678 if (!PetscBTLookup(change_basis,i)) { 1679 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1680 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1681 total_counts++; 1682 } 1683 } 1684 /* assembling */ 1685 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1686 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1687 /* 1688 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1689 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1690 */ 1691 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1692 if (pcbddc->use_change_of_basis) { 1693 /* dual and primal dofs on a single cc */ 1694 PetscInt dual_dofs,primal_dofs; 1695 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1696 PetscInt primal_counter; 1697 /* working stuff for GEQRF */ 1698 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1699 PetscBLASInt lqr_work; 1700 /* working stuff for UNGQR */ 1701 PetscScalar *gqr_work,lgqr_work_t; 1702 PetscBLASInt lgqr_work; 1703 /* working stuff for TRTRS */ 1704 PetscScalar *trs_rhs; 1705 PetscBLASInt Blas_NRHS; 1706 /* pointers for values insertion into change of basis matrix */ 1707 PetscInt *start_rows,*start_cols; 1708 PetscScalar *start_vals; 1709 /* working stuff for values insertion */ 1710 PetscBT is_primal; 1711 1712 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1713 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1714 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1715 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1716 /* work arrays */ 1717 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1718 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1719 /* nonzeros per row */ 1720 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1721 if (PetscBTLookup(change_basis,i)) { 1722 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1723 if (PetscBTLookup(qr_needed_idx,i)) { 1724 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1725 } else { 1726 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1727 /* get local primal index on the cc */ 1728 j = 0; 1729 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1730 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1731 } 1732 } 1733 } 1734 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1735 ierr = PetscFree(nnz);CHKERRQ(ierr); 1736 /* Set initial identity in the matrix */ 1737 for (i=0;i<pcis->n_B;i++) { 1738 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1739 } 1740 1741 if (pcbddc->dbg_flag) { 1742 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1744 } 1745 1746 1747 /* Now we loop on the constraints which need a change of basis */ 1748 /* 1749 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1750 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1751 1752 Basic blocks of change of basis matrix T computed 1753 1754 - Using the following block transformation if there is only a primal dof on the cc 1755 (in the example, primal dof is the last one of the edge in LOCAL ordering 1756 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1757 | 1 0 ... 0 1 | 1758 | 0 1 ... 0 1 | 1759 | ... | 1760 | 0 ... 1 1 | 1761 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1762 1763 - via QR decomposition of constraints otherwise 1764 */ 1765 if (qr_needed) { 1766 /* space to store Q */ 1767 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1768 /* first we issue queries for optimal work */ 1769 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1770 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1771 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1772 lqr_work = -1; 1773 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1774 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1775 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1776 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1777 lgqr_work = -1; 1778 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1779 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1780 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1781 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1782 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1783 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1784 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1785 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1786 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1787 /* array to store scaling factors for reflectors */ 1788 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1789 /* array to store rhs and solution of triangular solver */ 1790 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1791 /* allocating workspace for check */ 1792 if (pcbddc->dbg_flag) { 1793 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1794 } 1795 } 1796 /* array to store whether a node is primal or not */ 1797 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1798 for (i=0;i<total_primal_vertices;i++) { 1799 ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr); 1800 } 1801 1802 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1803 /* -> using implicit ordering contained in temp_indices data */ 1804 total_counts = pcbddc->n_vertices; 1805 primal_counter = total_counts; 1806 while (total_counts<pcbddc->local_primal_size) { 1807 primal_dofs = 1; 1808 if (PetscBTLookup(change_basis,total_counts)) { 1809 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1810 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1811 primal_dofs++; 1812 } 1813 /* get constraint info */ 1814 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1815 dual_dofs = size_of_constraint-primal_dofs; 1816 1817 if (pcbddc->dbg_flag) { 1818 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1819 } 1820 1821 if (primal_dofs > 1) { /* QR */ 1822 1823 /* copy quadrature constraints for change of basis check */ 1824 if (pcbddc->dbg_flag) { 1825 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1826 } 1827 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1828 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1829 1830 /* compute QR decomposition of constraints */ 1831 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1832 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1833 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1834 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1835 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1836 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1837 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1838 1839 /* explictly compute R^-T */ 1840 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1841 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1842 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1843 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1844 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1845 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1846 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1847 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1848 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1849 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1850 1851 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1854 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1856 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1857 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1858 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1859 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1860 1861 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1862 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1863 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1864 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1865 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1866 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1867 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1868 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1870 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1871 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1872 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1873 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1874 1875 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1876 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1877 /* insert cols for primal dofs */ 1878 for (j=0;j<primal_dofs;j++) { 1879 start_vals = &qr_basis[j*size_of_constraint]; 1880 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1881 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1882 } 1883 /* insert cols for dual dofs */ 1884 for (j=0,k=0;j<dual_dofs;k++) { 1885 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1886 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1887 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1888 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1889 j++; 1890 } 1891 } 1892 1893 /* check change of basis */ 1894 if (pcbddc->dbg_flag) { 1895 PetscInt ii,jj; 1896 PetscBool valid_qr=PETSC_TRUE; 1897 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 1898 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1899 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 1900 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1901 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 1902 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 1903 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1904 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 1905 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1906 for (jj=0;jj<size_of_constraint;jj++) { 1907 for (ii=0;ii<primal_dofs;ii++) { 1908 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 1909 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 1910 } 1911 } 1912 if (!valid_qr) { 1913 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1914 for (jj=0;jj<size_of_constraint;jj++) { 1915 for (ii=0;ii<primal_dofs;ii++) { 1916 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 1917 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1918 } 1919 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 1920 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 1921 } 1922 } 1923 } 1924 } else { 1925 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 1926 } 1927 } 1928 } else { /* simple transformation block */ 1929 PetscInt row,col; 1930 PetscScalar val; 1931 for (j=0;j<size_of_constraint;j++) { 1932 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 1933 if (!PetscBTLookup(is_primal,row)) { 1934 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1935 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 1936 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 1937 } else { 1938 for (k=0;k<size_of_constraint;k++) { 1939 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 1940 if (row != col) { 1941 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 1942 } else { 1943 val = 1.0; 1944 } 1945 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 1946 } 1947 } 1948 } 1949 } 1950 /* increment primal counter */ 1951 primal_counter += primal_dofs; 1952 } else { 1953 if (pcbddc->dbg_flag) { 1954 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 1955 } 1956 } 1957 /* increment constraint counter total_counts */ 1958 total_counts += primal_dofs; 1959 } 1960 1961 /* free workspace */ 1962 if (qr_needed) { 1963 if (pcbddc->dbg_flag) { 1964 ierr = PetscFree(work);CHKERRQ(ierr); 1965 } 1966 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 1967 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 1968 ierr = PetscFree(qr_work);CHKERRQ(ierr); 1969 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 1970 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 1971 } 1972 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 1973 /* assembling */ 1974 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1975 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1976 /* 1977 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1978 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 1979 */ 1980 } 1981 1982 /* flush dbg viewer */ 1983 if (pcbddc->dbg_flag) { 1984 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1985 } 1986 1987 /* free workspace */ 1988 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 1989 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 1990 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 1991 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 1992 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 1993 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 1994 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 1995 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 1996 ierr = PetscFree(local_to_B);CHKERRQ(ierr); 1997 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 #undef __FUNCT__ 2002 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2003 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2004 { 2005 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2006 PC_IS *pcis = (PC_IS*)pc->data; 2007 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2008 PetscInt bs,ierr,i,vertex_size; 2009 PetscViewer viewer=pcbddc->dbg_viewer; 2010 2011 PetscFunctionBegin; 2012 /* Init local Graph struct */ 2013 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2014 2015 /* Check validity of the csr graph passed in by the user */ 2016 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2017 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2018 } 2019 2020 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2021 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2022 Mat mat_adj; 2023 const PetscInt *xadj,*adjncy; 2024 PetscBool flg_row=PETSC_TRUE; 2025 2026 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2027 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2028 if (!flg_row) { 2029 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2030 } 2031 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2032 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2033 if (!flg_row) { 2034 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2035 } 2036 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2037 } 2038 2039 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2040 vertex_size = 1; 2041 if (!pcbddc->user_provided_isfordofs) { 2042 if (!pcbddc->n_ISForDofs) { 2043 IS *custom_ISForDofs; 2044 2045 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2046 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2047 for (i=0;i<bs;i++) { 2048 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2049 } 2050 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2051 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2052 /* remove my references to IS objects */ 2053 for (i=0;i<bs;i++) { 2054 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2055 } 2056 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2057 } 2058 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2059 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2060 } 2061 2062 /* Setup of Graph */ 2063 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2064 PetscInt i,lsize,*idxs; 2065 PetscScalar *vals; 2066 2067 /* get dirichlet indices in local ordering exploiting local to global map */ 2068 ierr = ISGetLocalSize(pcbddc->DirichletBoundaries,&lsize);CHKERRQ(ierr); 2069 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2070 for (i=0;i<lsize;i++) vals[i] = 1.0; 2071 ierr = ISGetIndices(pcbddc->DirichletBoundaries,(const PetscInt**)&idxs);CHKERRQ(ierr); 2072 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2073 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2074 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 2075 ierr = ISRestoreIndices(pcbddc->DirichletBoundaries,(const PetscInt**)&idxs);CHKERRQ(ierr); 2076 ierr = PetscFree(vals);CHKERRQ(ierr); 2077 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 2078 /* now compute dirichlet set in local ordering */ 2079 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2080 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2081 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 2082 for (i=0,lsize=0;i<pcis->n;i++) { 2083 if (vals[i] == 1.0) { 2084 lsize++; 2085 } 2086 } 2087 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 2088 for (i=0,lsize=0;i<pcis->n;i++) { 2089 if (vals[i] == 1.0) { 2090 idxs[lsize++] = i; 2091 } 2092 } 2093 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 2094 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2095 } 2096 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2097 PetscInt i,lsize,*idxs; 2098 PetscScalar *vals; 2099 2100 /* get dirichlet indices in local ordering exploiting local to global map */ 2101 ierr = ISGetLocalSize(pcbddc->NeumannBoundaries,&lsize);CHKERRQ(ierr); 2102 ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2103 for (i=0;i<lsize;i++) vals[i] = 1.0; 2104 ierr = ISGetIndices(pcbddc->NeumannBoundaries,(const PetscInt**)&idxs);CHKERRQ(ierr); 2105 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2106 ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2107 ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr); 2108 ierr = ISRestoreIndices(pcbddc->NeumannBoundaries,(const PetscInt**)&idxs);CHKERRQ(ierr); 2109 ierr = PetscFree(vals);CHKERRQ(ierr); 2110 ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr); 2111 /* now compute dirichlet set in local ordering */ 2112 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2114 ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 2115 for (i=0,lsize=0;i<pcis->n;i++) { 2116 if (vals[i] == 1.0) { 2117 lsize++; 2118 } 2119 } 2120 ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr); 2121 for (i=0,lsize=0;i<pcis->n;i++) { 2122 if (vals[i] == 1.0) { 2123 idxs[lsize++] = i; 2124 } 2125 } 2126 ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr); 2127 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2128 } 2129 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2130 2131 /* Graph's connected components analysis */ 2132 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2133 2134 /* print some info to stdout */ 2135 if (pcbddc->dbg_flag) { 2136 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2137 } 2138 PetscFunctionReturn(0); 2139 } 2140 2141 #undef __FUNCT__ 2142 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2143 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2144 { 2145 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2146 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 n = 0; 2151 vertices = 0; 2152 if (pcbddc->ConstraintMatrix) { 2153 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2154 for (i=0;i<local_primal_size;i++) { 2155 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2156 if (size_of_constraint == 1) n++; 2157 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2158 } 2159 if (vertices_idx) { 2160 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2161 n = 0; 2162 for (i=0;i<local_primal_size;i++) { 2163 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2164 if (size_of_constraint == 1) { 2165 vertices[n++]=row_cmat_indices[0]; 2166 } 2167 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2168 } 2169 } 2170 } 2171 *n_vertices = n; 2172 if (vertices_idx) *vertices_idx = vertices; 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2178 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2179 { 2180 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2181 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2182 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2183 PetscBT touched; 2184 PetscErrorCode ierr; 2185 2186 /* This function assumes that the number of local constraints per connected component 2187 is not greater than the number of nodes defined for the connected component 2188 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2189 PetscFunctionBegin; 2190 n = 0; 2191 constraints_index = 0; 2192 if (pcbddc->ConstraintMatrix) { 2193 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2194 max_size_of_constraint = 0; 2195 for (i=0;i<local_primal_size;i++) { 2196 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2197 if (size_of_constraint > 1) { 2198 n++; 2199 } 2200 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2201 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2202 } 2203 if (constraints_idx) { 2204 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2205 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2206 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2207 n = 0; 2208 for (i=0;i<local_primal_size;i++) { 2209 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2210 if (size_of_constraint > 1) { 2211 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2212 /* find first untouched local node */ 2213 j = 0; 2214 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2215 min_index = row_cmat_global_indices[j]; 2216 min_loc = j; 2217 /* search the minimum among nodes not yet touched on the connected component 2218 since there can be more than one constraint on a single cc */ 2219 for (j=1;j<size_of_constraint;j++) { 2220 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2221 min_index = row_cmat_global_indices[j]; 2222 min_loc = j; 2223 } 2224 } 2225 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2226 constraints_index[n++] = row_cmat_indices[min_loc]; 2227 } 2228 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2229 } 2230 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2231 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2232 } 2233 } 2234 *n_constraints = n; 2235 if (constraints_idx) *constraints_idx = constraints_index; 2236 PetscFunctionReturn(0); 2237 } 2238 2239 /* the next two functions has been adapted from pcis.c */ 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "PCBDDCApplySchur" 2242 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2243 { 2244 PetscErrorCode ierr; 2245 PC_IS *pcis = (PC_IS*)(pc->data); 2246 2247 PetscFunctionBegin; 2248 if (!vec2_B) { vec2_B = v; } 2249 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2250 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2251 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2252 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2253 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2259 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2260 { 2261 PetscErrorCode ierr; 2262 PC_IS *pcis = (PC_IS*)(pc->data); 2263 2264 PetscFunctionBegin; 2265 if (!vec2_B) { vec2_B = v; } 2266 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2267 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2268 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2269 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2270 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2271 PetscFunctionReturn(0); 2272 } 2273 2274 #undef __FUNCT__ 2275 #define __FUNCT__ "PCBDDCSubsetNumbering" 2276 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2277 { 2278 Vec local_vec,global_vec; 2279 IS seqis,paris; 2280 VecScatter scatter_ctx; 2281 PetscScalar *array; 2282 PetscInt *temp_global_dofs; 2283 PetscScalar globalsum; 2284 PetscInt i,j,s; 2285 PetscInt nlocals,first_index,old_index,max_local; 2286 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2287 PetscMPIInt *dof_sizes,*dof_displs; 2288 PetscBool first_found; 2289 PetscErrorCode ierr; 2290 2291 PetscFunctionBegin; 2292 /* mpi buffers */ 2293 MPI_Comm_size(comm,&size_prec_comm); 2294 MPI_Comm_rank(comm,&rank_prec_comm); 2295 j = ( !rank_prec_comm ? size_prec_comm : 0); 2296 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2297 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2298 /* get maximum size of subset */ 2299 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2300 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2301 max_local = 0; 2302 if (n_local_dofs) { 2303 max_local = temp_global_dofs[0]; 2304 for (i=1;i<n_local_dofs;i++) { 2305 if (max_local < temp_global_dofs[i] ) { 2306 max_local = temp_global_dofs[i]; 2307 } 2308 } 2309 } 2310 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2311 max_global++; 2312 max_local = 0; 2313 if (n_local_dofs) { 2314 max_local = local_dofs[0]; 2315 for (i=1;i<n_local_dofs;i++) { 2316 if (max_local < local_dofs[i] ) { 2317 max_local = local_dofs[i]; 2318 } 2319 } 2320 } 2321 max_local++; 2322 /* allocate workspace */ 2323 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2324 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2325 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2326 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2327 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2328 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2329 /* create scatter */ 2330 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2331 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2332 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2333 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2334 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2335 /* init array */ 2336 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2337 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2338 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2339 if (local_dofs_mult) { 2340 for (i=0;i<n_local_dofs;i++) { 2341 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2342 } 2343 } else { 2344 for (i=0;i<n_local_dofs;i++) { 2345 array[local_dofs[i]]=1.0; 2346 } 2347 } 2348 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2349 /* scatter into global vec and get total number of global dofs */ 2350 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2351 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2352 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2353 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2354 /* Fill global_vec with cumulative function for global numbering */ 2355 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2356 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2357 nlocals = 0; 2358 first_index = -1; 2359 first_found = PETSC_FALSE; 2360 for (i=0;i<s;i++) { 2361 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2362 first_found = PETSC_TRUE; 2363 first_index = i; 2364 } 2365 nlocals += (PetscInt)PetscRealPart(array[i]); 2366 } 2367 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2368 if (!rank_prec_comm) { 2369 dof_displs[0]=0; 2370 for (i=1;i<size_prec_comm;i++) { 2371 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2372 } 2373 } 2374 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2375 if (first_found) { 2376 array[first_index] += (PetscScalar)nlocals; 2377 old_index = first_index; 2378 for (i=first_index+1;i<s;i++) { 2379 if (PetscRealPart(array[i]) > 0.0) { 2380 array[i] += array[old_index]; 2381 old_index = i; 2382 } 2383 } 2384 } 2385 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2386 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2387 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2388 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2389 /* get global ordering of local dofs */ 2390 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2391 if (local_dofs_mult) { 2392 for (i=0;i<n_local_dofs;i++) { 2393 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2394 } 2395 } else { 2396 for (i=0;i<n_local_dofs;i++) { 2397 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2398 } 2399 } 2400 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2401 /* free workspace */ 2402 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2403 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2404 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2405 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2406 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2407 /* return pointer to global ordering of local dofs */ 2408 *global_numbering_subset = temp_global_dofs; 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2414 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2415 { 2416 PetscInt i,j; 2417 PetscScalar *alphas; 2418 PetscErrorCode ierr; 2419 2420 PetscFunctionBegin; 2421 /* this implements stabilized Gram-Schmidt */ 2422 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2423 for (i=0;i<n;i++) { 2424 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2425 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2426 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2427 } 2428 ierr = PetscFree(alphas);CHKERRQ(ierr); 2429 PetscFunctionReturn(0); 2430 } 2431 2432 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */ 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatConvert_IS_AIJ" 2435 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M) 2436 { 2437 Mat new_mat; 2438 Mat_IS *matis = (Mat_IS*)(mat->data); 2439 /* info on mat */ 2440 PetscInt bs,rows,cols; 2441 PetscInt lrows,lcols; 2442 PetscInt local_rows,local_cols; 2443 PetscMPIInt nsubdomains; 2444 /* preallocation */ 2445 Vec vec_dnz,vec_onz; 2446 PetscScalar *my_dnz,*my_onz,*array; 2447 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2448 PetscInt index_row,index_col,owner; 2449 PetscInt *local_indices,*global_indices; 2450 /* work */ 2451 PetscInt i,j; 2452 PetscErrorCode ierr; 2453 2454 PetscFunctionBegin; 2455 /* CHECKS */ 2456 /* get info from mat */ 2457 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2458 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2459 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2460 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2461 2462 /* MAT_INITIAL_MATRIX */ 2463 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2464 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2465 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2466 ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr); 2467 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2468 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2469 2470 /* 2471 preallocation 2472 */ 2473 2474 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2475 /* 2476 Some vectors are needed to sum up properly on shared interface dofs. 2477 Note that preallocation is not exact, since it overestimates nonzeros 2478 */ 2479 /* 2480 ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr); 2481 ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr); 2482 ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr); 2483 ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr); 2484 */ 2485 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2486 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2487 /* All processes need to compute entire row ownership */ 2488 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2489 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2490 for (i=0;i<nsubdomains;i++) { 2491 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2492 row_ownership[j]=i; 2493 } 2494 } 2495 /* map indices of local mat to global values */ 2496 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2497 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2498 for (i=0;i<local_rows;i++) local_indices[i]=i; 2499 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2500 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2501 2502 /* 2503 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2504 then, they will be summed up properly. This way, preallocation is always sufficient 2505 */ 2506 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2507 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2508 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2509 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2510 for (i=0;i<local_rows;i++) { 2511 index_row = global_indices[i]; 2512 for (j=i;j<local_rows;j++) { 2513 owner = row_ownership[index_row]; 2514 index_col = global_indices[j]; 2515 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2516 my_dnz[i] += 1.0; 2517 } else { /* offdiag block */ 2518 my_onz[i] += 1.0; 2519 } 2520 /* same as before, interchanging rows and cols */ 2521 if (i != j) { 2522 owner = row_ownership[index_col]; 2523 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2524 my_dnz[j] += 1.0; 2525 } else { 2526 my_onz[j] += 1.0; 2527 } 2528 } 2529 } 2530 } 2531 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2532 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2533 if (local_rows) { /* multilevel guard */ 2534 ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2535 ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2536 } 2537 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2538 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2539 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2540 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2541 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2542 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2543 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2544 2545 /* set computed preallocation in dnz and onz */ 2546 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2547 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2548 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2549 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2550 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2551 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2552 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2553 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2554 2555 /* Resize preallocation if overestimated */ 2556 for (i=0;i<lrows;i++) { 2557 dnz[i] = PetscMin(dnz[i],lcols); 2558 onz[i] = PetscMin(onz[i],cols-lcols); 2559 } 2560 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2561 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2562 2563 /* 2564 Set values. Very Basic. 2565 */ 2566 for (i=0;i<local_rows;i++) { 2567 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2568 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2569 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2570 ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2571 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2572 } 2573 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2574 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2575 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2576 2577 /* get back mat */ 2578 *M = new_mat; 2579 PetscFunctionReturn(0); 2580 } 2581 2582 #undef __FUNCT__ 2583 #define __FUNCT__ "MatISSubassemble_Private" 2584 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2585 { 2586 Mat subdomain_adj; 2587 IS new_ranks,ranks_send_to; 2588 MatPartitioning partitioner; 2589 Mat_IS *matis; 2590 PetscInt n_neighs,*neighs,*n_shared,**shared; 2591 PetscInt prank; 2592 PetscMPIInt size,rank,color; 2593 PetscInt *xadj,*adjncy,*oldranks; 2594 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2595 PetscInt i,j,n_subdomains,local_size,threshold=0; 2596 PetscErrorCode ierr; 2597 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2598 PetscSubcomm subcomm; 2599 2600 PetscFunctionBegin; 2601 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2602 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2603 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2604 2605 /* Get info on mapping */ 2606 matis = (Mat_IS*)(mat->data); 2607 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2608 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2609 2610 /* build local CSR graph of subdomains' connectivity */ 2611 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2612 xadj[0] = 0; 2613 xadj[1] = PetscMax(n_neighs-1,0); 2614 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2615 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2616 2617 if (threshold) { 2618 PetscInt* count,min_threshold; 2619 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2620 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2621 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2622 for (j=0;j<n_shared[i];j++) { 2623 count[shared[i][j]] += 1; 2624 } 2625 } 2626 /* adapt threshold since we dont want to lose significant connections */ 2627 min_threshold = n_neighs; 2628 for (i=1;i<n_neighs;i++) { 2629 for (j=0;j<n_shared[i];j++) { 2630 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2631 } 2632 } 2633 PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold); 2634 threshold = PetscMax(min_threshold+1,threshold); 2635 PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold); 2636 xadj[1] = 0; 2637 for (i=1;i<n_neighs;i++) { 2638 for (j=0;j<n_shared[i];j++) { 2639 if (count[shared[i][j]] < threshold) { 2640 adjncy[xadj[1]] = neighs[i]; 2641 adjncy_wgt[xadj[1]] = n_shared[i]; 2642 xadj[1]++; 2643 break; 2644 } 2645 } 2646 } 2647 ierr = PetscFree(count);CHKERRQ(ierr); 2648 } else { 2649 if (xadj[1]) { 2650 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2651 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2652 } 2653 } 2654 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2655 if (use_square) { 2656 for (i=0;i<xadj[1];i++) { 2657 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2658 } 2659 } 2660 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2661 2662 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2663 2664 /* 2665 Restrict work on active processes only. 2666 */ 2667 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2668 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2669 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2670 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2671 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2672 if (color) { 2673 ierr = PetscFree(xadj);CHKERRQ(ierr); 2674 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2675 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2676 } else { 2677 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2678 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2679 prank = rank; 2680 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2681 for (i=0;i<size;i++) { 2682 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2683 } 2684 for (i=0;i<xadj[1];i++) { 2685 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2686 } 2687 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2688 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2689 n_subdomains = (PetscInt)size; 2690 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2691 2692 /* Partition */ 2693 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2694 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2695 if (use_vwgt) { 2696 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2697 v_wgt[0] = local_size; 2698 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2699 } 2700 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2701 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2702 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2703 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2704 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2705 2706 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2707 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2708 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2709 /* clean up */ 2710 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2711 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2712 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2713 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2714 } 2715 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2716 2717 /* assemble parallel IS for sends */ 2718 i = 1; 2719 if (color) i=0; 2720 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2721 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2722 2723 /* get back IS */ 2724 *is_sends = ranks_send_to; 2725 PetscFunctionReturn(0); 2726 } 2727 2728 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2729 2730 #undef __FUNCT__ 2731 #define __FUNCT__ "MatISSubassemble" 2732 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2733 { 2734 Mat local_mat,new_mat; 2735 Mat_IS *matis; 2736 IS is_sends_internal; 2737 PetscInt rows,cols; 2738 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2739 PetscBool ismatis,isdense; 2740 ISLocalToGlobalMapping l2gmap; 2741 PetscInt* l2gmap_indices; 2742 const PetscInt* is_indices; 2743 MatType new_local_type; 2744 MatTypePrivate new_local_type_private; 2745 /* buffers */ 2746 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2747 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2748 /* MPI */ 2749 MPI_Comm comm; 2750 PetscMPIInt n_sends,n_recvs,commsize; 2751 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2752 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2753 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2754 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 /* checks */ 2759 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2760 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2761 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2762 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2763 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2764 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2765 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2766 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2767 PetscValidLogicalCollectiveInt(mat,bs,0); 2768 /* prepare IS for sending if not provided */ 2769 if (!is_sends) { 2770 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2771 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2772 } else { 2773 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2774 is_sends_internal = is_sends; 2775 } 2776 2777 /* get pointer of MATIS data */ 2778 matis = (Mat_IS*)mat->data; 2779 2780 /* get comm */ 2781 comm = PetscObjectComm((PetscObject)mat); 2782 2783 /* compute number of sends */ 2784 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2785 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2786 2787 /* compute number of receives */ 2788 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2789 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2790 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2791 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2792 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2793 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2794 ierr = PetscFree(iflags);CHKERRQ(ierr); 2795 2796 /* prepare send/receive buffers */ 2797 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2798 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2799 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2800 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2801 2802 /* Get data from local mat */ 2803 if (!isdense) { 2804 /* TODO: See below some guidelines on how to prepare the local buffers */ 2805 /* 2806 send_buffer_vals should contain the raw values of the local matrix 2807 send_buffer_idxs should contain: 2808 - MatType_PRIVATE type 2809 - PetscInt size_of_l2gmap 2810 - PetscInt global_row_indices[size_of_l2gmap] 2811 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2812 */ 2813 } else { 2814 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2816 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2817 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2818 send_buffer_idxs[1] = i; 2819 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2820 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2821 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2822 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2823 for (i=0;i<n_sends;i++) { 2824 ilengths_vals[is_indices[i]] = len*len; 2825 ilengths_idxs[is_indices[i]] = len+2; 2826 } 2827 } 2828 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2829 buf_size_idxs = 0; 2830 buf_size_vals = 0; 2831 for (i=0;i<n_recvs;i++) { 2832 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2833 buf_size_vals += (PetscInt)olengths_vals[i]; 2834 } 2835 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2836 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2837 2838 /* get new tags for clean communications */ 2839 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2840 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2841 2842 /* allocate for requests */ 2843 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2844 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2845 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2846 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2847 2848 /* communications */ 2849 ptr_idxs = recv_buffer_idxs; 2850 ptr_vals = recv_buffer_vals; 2851 for (i=0;i<n_recvs;i++) { 2852 source_dest = onodes[i]; 2853 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2854 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2855 ptr_idxs += olengths_idxs[i]; 2856 ptr_vals += olengths_vals[i]; 2857 } 2858 for (i=0;i<n_sends;i++) { 2859 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2860 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2861 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2862 } 2863 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2864 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2865 2866 /* assemble new l2g map */ 2867 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2868 ptr_idxs = recv_buffer_idxs; 2869 buf_size_idxs = 0; 2870 for (i=0;i<n_recvs;i++) { 2871 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2872 ptr_idxs += olengths_idxs[i]; 2873 } 2874 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2875 ptr_idxs = recv_buffer_idxs; 2876 buf_size_idxs = 0; 2877 for (i=0;i<n_recvs;i++) { 2878 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2879 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2880 ptr_idxs += olengths_idxs[i]; 2881 } 2882 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2883 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 2884 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 2885 2886 /* infer new local matrix type from received local matrices type */ 2887 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 2888 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 2889 new_local_type_private = MATAIJ_PRIVATE; 2890 new_local_type = MATSEQAIJ; 2891 if (n_recvs) { 2892 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 2893 ptr_idxs = recv_buffer_idxs; 2894 for (i=0;i<n_recvs;i++) { 2895 if ((PetscInt)new_local_type_private != *ptr_idxs) { 2896 new_local_type_private = MATAIJ_PRIVATE; 2897 break; 2898 } 2899 ptr_idxs += olengths_idxs[i]; 2900 } 2901 switch (new_local_type_private) { 2902 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 2903 new_local_type = MATSEQAIJ; 2904 bs = 1; 2905 break; 2906 case MATAIJ_PRIVATE: 2907 new_local_type = MATSEQAIJ; 2908 bs = 1; 2909 break; 2910 case MATBAIJ_PRIVATE: 2911 new_local_type = MATSEQBAIJ; 2912 break; 2913 case MATSBAIJ_PRIVATE: 2914 new_local_type = MATSEQSBAIJ; 2915 break; 2916 default: 2917 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 2918 break; 2919 } 2920 } 2921 2922 /* create MATIS object */ 2923 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2924 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 2925 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 2926 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 2927 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 2928 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 2929 2930 /* set values */ 2931 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2932 ptr_vals = recv_buffer_vals; 2933 ptr_idxs = recv_buffer_idxs; 2934 for (i=0;i<n_recvs;i++) { 2935 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 2936 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2937 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 2938 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2939 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 2940 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2941 } 2942 ptr_idxs += olengths_idxs[i]; 2943 ptr_vals += olengths_vals[i]; 2944 } 2945 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2946 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2947 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2948 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2949 2950 { /* check */ 2951 Vec lvec,rvec; 2952 PetscReal infty_error; 2953 2954 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 2955 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 2956 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 2957 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 2958 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 2959 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 2960 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 2961 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 2962 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 2963 } 2964 2965 /* free workspace */ 2966 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 2967 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 2968 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2969 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 2970 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2971 if (isdense) { 2972 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2973 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2974 } else { 2975 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 2976 } 2977 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 2978 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 2979 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 2980 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 2981 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 2982 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 2983 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 2984 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 2985 ierr = PetscFree(onodes);CHKERRQ(ierr); 2986 /* get back new mat */ 2987 *mat_n = new_mat; 2988 PetscFunctionReturn(0); 2989 } 2990 2991 #undef __FUNCT__ 2992 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 2993 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 2994 { 2995 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2996 PC_IS *pcis = (PC_IS*)pc->data; 2997 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 2998 MatNullSpace CoarseNullSpace=NULL; 2999 ISLocalToGlobalMapping coarse_islg; 3000 IS coarse_is; 3001 PetscInt max_it,coarse_size,*local_primal_indices=NULL; 3002 PetscInt im_active=-1,active_procs=-1; 3003 PC pc_temp; 3004 PCType coarse_pc_type; 3005 KSPType coarse_ksp_type; 3006 PetscBool multilevel_requested,multilevel_allowed; 3007 PetscBool setsym,issym,isbddc,isnn; 3008 MatStructure matstruct; 3009 PetscErrorCode ierr; 3010 3011 PetscFunctionBegin; 3012 /* Assign global numbering to coarse dofs */ 3013 ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3014 3015 /* infer some info from user */ 3016 issym = PETSC_FALSE; 3017 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3018 multilevel_allowed = PETSC_FALSE; 3019 multilevel_requested = PETSC_FALSE; 3020 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3021 if (multilevel_requested) { 3022 /* count "active processes" */ 3023 im_active = !!(pcis->n); 3024 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3025 if (active_procs/pcbddc->coarsening_ratio < 2) { 3026 multilevel_allowed = PETSC_FALSE; 3027 } else { 3028 multilevel_allowed = PETSC_TRUE; 3029 } 3030 } 3031 3032 /* set defaults for coarse KSP and PC */ 3033 if (multilevel_allowed) { 3034 if (issym) { 3035 coarse_ksp_type = KSPRICHARDSON; 3036 } else { 3037 coarse_ksp_type = KSPCHEBYSHEV; 3038 } 3039 coarse_pc_type = PCBDDC; 3040 } else { 3041 coarse_ksp_type = KSPPREONLY; 3042 coarse_pc_type = PCREDUNDANT; 3043 } 3044 3045 /* create the coarse KSP object only once with defaults */ 3046 if (!pcbddc->coarse_ksp) { 3047 char prefix[256],str_level[3]; 3048 size_t len; 3049 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3050 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3051 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3052 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3053 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3054 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3055 /* prefix */ 3056 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3057 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3058 if (!pcbddc->current_level) { 3059 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3060 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3061 } else { 3062 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3063 if (pcbddc->current_level>1) len -= 2; 3064 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3065 *(prefix+len)='\0'; 3066 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3067 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3068 } 3069 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3070 } 3071 /* allow user customization */ 3072 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3073 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3074 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3075 3076 /* get some info after set from options */ 3077 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3078 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3079 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3080 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3081 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3082 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3083 isbddc = PETSC_FALSE; 3084 } 3085 3086 /* propagate BDDC info to the next level */ 3087 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3088 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3089 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3090 3091 /* creates temporary MATIS object for coarse matrix */ 3092 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3093 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3094 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3095 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3096 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3097 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3098 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3099 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3100 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3101 ierr = PetscFree(local_primal_indices);CHKERRQ(ierr); 3102 3103 /* assemble coarse matrix */ 3104 if (isbddc || isnn) { 3105 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3106 } else { 3107 ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3108 } 3109 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3110 3111 /* create local to global scatters for coarse problem */ 3112 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3113 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3114 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3115 3116 /* propagate symmetry info to coarse matrix */ 3117 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3118 3119 /* Compute coarse null space (special handling by BDDC only) */ 3120 if (pcbddc->NullSpace) { 3121 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3122 if (isbddc) { 3123 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3124 } else { 3125 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3126 } 3127 } 3128 3129 /* set operators */ 3130 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3131 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3132 3133 /* additional KSP customization */ 3134 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3135 if (max_it < 5) { 3136 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3137 } 3138 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3139 3140 3141 /* print some info if requested */ 3142 if (pcbddc->dbg_flag) { 3143 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3144 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3145 if (!multilevel_allowed) { 3146 if (multilevel_requested) { 3147 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3148 } else if (pcbddc->max_levels) { 3149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3150 } 3151 } 3152 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3153 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3154 } 3155 3156 /* setup coarse ksp */ 3157 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3158 if (pcbddc->dbg_flag) { 3159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3161 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3162 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3163 } 3164 3165 /* Check coarse problem if requested */ 3166 if (pcbddc->dbg_flag) { 3167 KSP check_ksp; 3168 KSPType check_ksp_type; 3169 PC check_pc; 3170 Vec check_vec; 3171 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3172 PetscInt its; 3173 PetscBool ispreonly,compute; 3174 3175 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3176 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3177 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3178 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr); 3179 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3180 if (ispreonly) { 3181 check_ksp_type = KSPPREONLY; 3182 compute = PETSC_FALSE; 3183 } else { 3184 if (issym) check_ksp_type = KSPCG; 3185 else check_ksp_type = KSPGMRES; 3186 compute = PETSC_TRUE; 3187 } 3188 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3189 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3190 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3191 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3192 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3193 /* create random vec */ 3194 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3195 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3196 if (CoarseNullSpace) { 3197 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3198 } 3199 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3200 /* solve coarse problem */ 3201 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3202 if (CoarseNullSpace) { 3203 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3204 } 3205 /* check coarse problem residual error */ 3206 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3207 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3208 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3209 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3210 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3211 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3213 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3214 /* get eigenvalue estimation if preonly has not been requested */ 3215 if (!ispreonly) { 3216 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3217 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3219 } 3220 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3221 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3222 } 3223 /* free memory */ 3224 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3225 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3226 PetscFunctionReturn(0); 3227 } 3228 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3231 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3232 { 3233 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3234 PC_IS* pcis = (PC_IS*)pc->data; 3235 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3236 PetscInt i,j,coarse_size; 3237 PetscInt *local_primal_indices,*auxlocal_primal,*aux_idx; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 /* get indices in local ordering for vertices and constraints */ 3242 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr); 3243 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr); 3244 ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr); 3245 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3246 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr); 3247 ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr); 3248 ierr = PetscFree(aux_idx);CHKERRQ(ierr); 3249 3250 /* Compute global number of coarse dofs */ 3251 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3252 3253 /* check numbering */ 3254 if (pcbddc->dbg_flag) { 3255 PetscScalar coarsesum,*array; 3256 3257 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3259 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3260 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3261 for (i=0;i<pcbddc->local_primal_size;i++) { 3262 ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3263 } 3264 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3265 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3266 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3267 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3268 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3269 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3270 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3271 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3272 for (i=0;i<pcis->n;i++) { 3273 if (array[i] == 1.0) { 3274 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3275 } 3276 } 3277 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3278 for (i=0;i<pcis->n;i++) { 3279 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3280 } 3281 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3282 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3283 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3284 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3285 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3286 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3287 if (pcbddc->dbg_flag > 1) { 3288 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3289 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3291 for (i=0;i<pcbddc->local_primal_size;i++) { 3292 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],auxlocal_primal[i]); 3293 } 3294 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3295 } 3296 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3297 } 3298 ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr); 3299 /* get back data */ 3300 *coarse_size_n = coarse_size; 3301 *local_primal_indices_n = local_primal_indices; 3302 PetscFunctionReturn(0); 3303 } 3304 3305