1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 32 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 33 } 34 35 if (pcbddc->dbg_flag) { 36 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 37 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 38 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 39 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 40 } 41 42 /* max size of subsets */ 43 mss = 0; 44 for (i=0;i<sub_schurs->n_subs;i++) { 45 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 46 PetscInt subset_size; 47 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 48 mss = PetscMax(mss,subset_size); 49 } 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 55 nmax = PetscMax(nmin,nmax); 56 if (pcbddc->adaptive_threshold > 1.0) { 57 thresh = 1.0/pcbddc->adaptive_threshold; 58 } else { 59 thresh = 1.0; 60 } 61 62 /* allocate lapack workspace */ 63 cum = cum2 = 0; 64 maxneigs = 0; 65 for (i=0;i<sub_schurs->n_subs;i++) { 66 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 67 PetscInt n,subset_size; 68 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 69 n = PetscMin(subset_size,nmax); 70 cum += subset_size*n; 71 cum2 += n; 72 maxneigs = PetscMax(maxneigs,n); 73 } 74 } 75 76 if (mss) { 77 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 78 PetscBLASInt B_itype = 1; 79 PetscBLASInt B_N = mss; 80 PetscReal zero = 0.0; 81 PetscReal eps = 0.0; /* dlamch? */ 82 83 B_lwork = -1; 84 S = NULL; 85 St = NULL; 86 eigs = NULL; 87 eigv = NULL; 88 B_iwork = NULL; 89 B_ifail = NULL; 90 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 91 #if defined(PETSC_USE_COMPLEX) 92 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 93 #else 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 95 #endif 96 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 97 ierr = PetscFPTrapPop();CHKERRQ(ierr); 98 } else { 99 /* TODO */ 100 } 101 } else { 102 lwork = 0; 103 } 104 105 nv = 0; 106 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 107 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 108 } 109 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 110 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 111 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 112 #if defined(PETSC_USE_COMPLEX) 113 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 114 #endif 115 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 116 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 117 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 118 nv+cum,&pcbddc->adaptive_constraints_idxs, 119 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 120 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 121 122 maxneigs = 0; 123 cum = cum2 = cumarray = 0; 124 if (sub_schurs->is_Ej_com) { 125 const PetscInt *idxs; 126 127 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 128 for (cum=0;cum<nv;cum++) { 129 pcbddc->adaptive_constraints_n[cum] = 1; 130 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 131 pcbddc->adaptive_constraints_ptrs[cum] = cum; 132 pcbddc->adaptive_constraints_data[cum] = 1.0; 133 } 134 cum2 = cum; 135 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 136 } 137 138 if (mss) { /* multilevel */ 139 if (pcbddc->use_deluxe_scaling) { 140 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 141 } else { 142 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 143 } 144 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 145 } 146 147 if (pcbddc->dbg_flag) { 148 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 149 } 150 151 for (i=0;i<sub_schurs->n_subs;i++) { 152 PetscInt j,subset_size; 153 154 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 155 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 156 const PetscInt *idxs; 157 PetscScalar one = 1.0,scalar_zero = 0.0; 158 PetscReal zero=0.0; 159 PetscBLASInt B_N; 160 161 /* S should be copied since we need it for deluxe scaling */ 162 if (sub_schurs->is_hermitian) { 163 PetscInt j; 164 for (j=0;j<subset_size;j++) { 165 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 } else { 171 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 172 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 173 } 174 /* is always this the right matrix? */ 175 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 177 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 178 /* St = Starray+cumarray; */ 179 180 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 181 PetscBLASInt B_itype = 1; 182 PetscBLASInt B_IL = 1, B_IU; 183 PetscReal eps = -1.0; /* dlamch? */ 184 PetscInt nmin_s; 185 186 /* ask for eigenvalues lower than thresh */ 187 if (pcbddc->dbg_flag) { 188 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d.\n",i,sub_schurs->n_subs); 189 } 190 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 191 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 192 #if defined(PETSC_USE_COMPLEX) 193 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 194 #else 195 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 196 #endif 197 ierr = PetscFPTrapPop();CHKERRQ(ierr); 198 if (B_ierr) { 199 if (B_ierr < 0 ) { 200 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 201 } else if (B_ierr <= B_N) { 202 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 203 } else { 204 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 205 } 206 } 207 208 if (B_neigs > nmax) { 209 if (pcbddc->dbg_flag) { 210 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 211 } 212 B_neigs = nmax; 213 } 214 215 nmin_s = PetscMin(nmin,B_N); 216 if (B_neigs < nmin_s) { 217 PetscBLASInt B_neigs2; 218 219 B_IL = B_neigs + 1; 220 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 221 if (pcbddc->dbg_flag) { 222 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 223 } 224 if (sub_schurs->is_hermitian) { 225 PetscInt j; 226 for (j=0;j<subset_size;j++) { 227 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 228 } 229 for (j=0;j<subset_size;j++) { 230 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 231 } 232 } else { 233 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 234 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 235 } 236 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 237 #if defined(PETSC_USE_COMPLEX) 238 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 239 #else 240 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 241 #endif 242 ierr = PetscFPTrapPop();CHKERRQ(ierr); 243 B_neigs += B_neigs2; 244 } 245 if (B_ierr) { 246 if (B_ierr < 0 ) { 247 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 248 } else if (B_ierr <= B_N) { 249 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 250 } else { 251 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 252 } 253 } 254 if (pcbddc->dbg_flag) { 255 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 256 for (j=0;j<B_neigs;j++) { 257 if (eigs[j] == 0.0) { 258 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 259 } else { 260 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1.0/eigs[j]); 261 } 262 } 263 } 264 } else { 265 /* TODO */ 266 } 267 maxneigs = PetscMax(B_neigs,maxneigs); 268 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 269 270 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 271 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 272 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 273 274 if (pcbddc->dbg_flag > 1) { 275 PetscInt ii; 276 for (ii=0;ii<B_neigs;ii++) { 277 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 278 for (j=0;j<B_N;j++) { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]); 280 } 281 } 282 } 283 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 284 for (j=0;j<B_neigs;j++) { 285 #if 0 286 { 287 PetscBLASInt Blas_N,Blas_one = 1.0; 288 PetscScalar norm; 289 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 290 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 291 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 292 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 293 } else { 294 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 295 } 296 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 297 } 298 #endif 299 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 300 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 301 cum2 += subset_size; 302 } 303 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 304 } 305 /* shift for next computation */ 306 cumarray += subset_size*subset_size; 307 } 308 if (pcbddc->dbg_flag) { 309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 310 } 311 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 312 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 313 314 if (mss) { 315 if (pcbddc->use_deluxe_scaling) { 316 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 317 } else { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 319 } 320 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 321 } 322 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 323 #if defined(PETSC_USE_COMPLEX) 324 ierr = PetscFree(rwork);CHKERRQ(ierr); 325 #endif 326 if (pcbddc->dbg_flag) { 327 PetscInt maxneigs_r; 328 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 329 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 330 } 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNCT__ 335 #define __FUNCT__ "PCBDDCSetUpSolvers" 336 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 337 { 338 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 339 PetscScalar *coarse_submat_vals; 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 /* Setup local scatters R_to_B and (optionally) R_to_D */ 344 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 345 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 346 347 /* Setup local neumann solver ksp_R */ 348 /* PCBDDCSetUpLocalScatters should be called first! */ 349 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 350 351 /* Change global null space passed in by the user if change of basis has been requested */ 352 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 353 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 354 } 355 356 /* 357 Setup local correction and local part of coarse basis. 358 Gives back the dense local part of the coarse matrix in column major ordering 359 */ 360 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 361 362 /* Compute total number of coarse nodes and setup coarse solver */ 363 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 364 365 /* free */ 366 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCResetCustomization" 372 PetscErrorCode PCBDDCResetCustomization(PC pc) 373 { 374 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 379 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 380 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 381 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 383 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 384 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 385 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 387 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 388 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 389 PetscFunctionReturn(0); 390 } 391 392 #undef __FUNCT__ 393 #define __FUNCT__ "PCBDDCResetTopography" 394 PetscErrorCode PCBDDCResetTopography(PC pc) 395 { 396 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 403 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 404 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "PCBDDCResetSolvers" 410 PetscErrorCode PCBDDCResetSolvers(PC pc) 411 { 412 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 413 PetscScalar *array; 414 PetscErrorCode ierr; 415 416 PetscFunctionBegin; 417 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 418 if (pcbddc->coarse_phi_B) { 419 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 420 ierr = PetscFree(array);CHKERRQ(ierr); 421 } 422 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 424 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 425 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 426 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 427 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 428 if (pcbddc->local_auxmat2) { 429 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 430 ierr = PetscFree(array);CHKERRQ(ierr); 431 } 432 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 434 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 436 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 437 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 440 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 443 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 444 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 446 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 447 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 454 { 455 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 456 PC_IS *pcis = (PC_IS*)pc->data; 457 VecType impVecType; 458 PetscInt n_constraints,n_R,old_size; 459 PetscErrorCode ierr; 460 461 PetscFunctionBegin; 462 if (!pcbddc->ConstraintMatrix) { 463 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 464 } 465 /* get sizes */ 466 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 467 n_R = pcis->n-pcbddc->n_actual_vertices; 468 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 469 /* local work vectors (try to avoid unneeded work)*/ 470 /* R nodes */ 471 old_size = -1; 472 if (pcbddc->vec1_R) { 473 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 474 } 475 if (n_R != old_size) { 476 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 477 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 481 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 482 } 483 /* local primal dofs */ 484 old_size = -1; 485 if (pcbddc->vec1_P) { 486 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 487 } 488 if (pcbddc->local_primal_size != old_size) { 489 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 490 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 491 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 492 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 493 } 494 /* local explicit constraints */ 495 old_size = -1; 496 if (pcbddc->vec1_C) { 497 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 498 } 499 if (n_constraints && n_constraints != old_size) { 500 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 501 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 502 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 503 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 504 } 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "PCBDDCSetUpCorrection" 510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 511 { 512 PetscErrorCode ierr; 513 /* pointers to pcis and pcbddc */ 514 PC_IS* pcis = (PC_IS*)pc->data; 515 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 516 /* submatrices of local problem */ 517 Mat A_RV,A_VR,A_VV; 518 /* submatrices of local coarse problem */ 519 Mat S_VV,S_CV,S_VC,S_CC; 520 /* working matrices */ 521 Mat C_CR; 522 /* additional working stuff */ 523 PC pc_R; 524 Mat F; 525 PetscBool isLU,isCHOL,isILU; 526 527 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 528 PetscScalar *work; 529 PetscInt *idx_V_B; 530 PetscInt n,n_vertices,n_constraints; 531 PetscInt i,n_R,n_D,n_B; 532 PetscBool unsymmetric_check; 533 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 534 MatType impMatType; 535 /* some shortcuts to scalars */ 536 PetscScalar one=1.0,m_one=-1.0; 537 538 PetscFunctionBegin; 539 /* get number of vertices (corners plus constraints with change of basis) 540 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 541 n_vertices = pcbddc->n_actual_vertices; 542 n_constraints = pcbddc->local_primal_size-n_vertices; 543 /* Set Non-overlapping dimensions */ 544 n_B = pcis->n_B; n_D = pcis->n - n_B; 545 n_R = pcis->n-n_vertices; 546 547 /* Set types for local objects needed by BDDC precondtioner */ 548 impMatType = MATSEQDENSE; 549 550 /* vertices in boundary numbering */ 551 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 552 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 553 if (i != n_vertices) { 554 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 555 } 556 557 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 558 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 560 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 568 unsymmetric_check = PETSC_FALSE; 569 /* allocate workspace */ 570 n = 0; 571 if (n_constraints) { 572 n += n_R*n_constraints; 573 } 574 if (n_vertices) { 575 n = PetscMax(2*n_R*n_vertices,n); 576 } 577 if (!pcbddc->issym) { 578 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 579 unsymmetric_check = PETSC_TRUE; 580 } 581 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 582 583 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 584 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 585 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 586 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 587 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 588 if (isLU || isILU || isCHOL) { 589 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 590 } else { 591 F = NULL; 592 } 593 594 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 595 if (n_constraints) { 596 Mat M1,M2,M3; 597 IS is_aux; 598 /* see if we can save some allocations */ 599 if (pcbddc->local_auxmat2) { 600 PetscInt on_R,on_constraints; 601 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 602 if (on_R != n_R || on_constraints != n_constraints) { 603 PetscScalar *marray; 604 605 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 606 ierr = PetscFree(marray);CHKERRQ(ierr); 607 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 608 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 609 } 610 } 611 /* auxiliary matrices */ 612 if (!pcbddc->local_auxmat2) { 613 PetscScalar *marray; 614 615 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 616 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 617 marray += n_R*n_constraints; 618 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 619 } 620 621 /* Extract constraints on R nodes: C_{CR} */ 622 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 623 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 625 626 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 627 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 628 for (i=0;i<n_constraints;i++) { 629 const PetscScalar *row_cmat_values; 630 const PetscInt *row_cmat_indices; 631 PetscInt size_of_constraint,j; 632 633 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 634 for (j=0;j<size_of_constraint;j++) { 635 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 636 } 637 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 638 } 639 if (F) { 640 Mat B; 641 642 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 643 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 644 ierr = MatDestroy(&B);CHKERRQ(ierr); 645 } else { 646 PetscScalar *xarray; 647 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 648 for (i=0;i<n_constraints;i++) { 649 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 650 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 651 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 652 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 653 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 654 } 655 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 656 } 657 658 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 659 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 660 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 661 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 662 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 663 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 664 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 665 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 666 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 667 ierr = MatDestroy(&M2);CHKERRQ(ierr); 668 ierr = MatDestroy(&M3);CHKERRQ(ierr); 669 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 670 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 671 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 672 ierr = MatDestroy(&M1);CHKERRQ(ierr); 673 } 674 /* Get submatrices from subdomain matrix */ 675 if (n_vertices) { 676 Mat newmat; 677 IS is_aux; 678 PetscInt ibs,mbs; 679 PetscBool issbaij; 680 681 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 682 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 683 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 684 if (ibs != mbs) { /* need to convert to SEQAIJ */ 685 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 686 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 687 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 688 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 689 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 690 } else { 691 /* this is safe */ 692 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 693 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 694 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 695 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 696 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 697 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 698 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 699 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 700 } else { 701 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 702 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 703 } 704 } 705 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 706 } 707 708 /* Matrix of coarse basis functions (local) */ 709 if (pcbddc->coarse_phi_B) { 710 PetscInt on_B,on_primal,on_D=n_D; 711 if (pcbddc->coarse_phi_D) { 712 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 713 } 714 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 715 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 716 PetscScalar *marray; 717 718 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 719 ierr = PetscFree(marray);CHKERRQ(ierr); 720 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 722 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 723 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 724 } 725 } 726 727 if (!pcbddc->coarse_phi_B) { 728 PetscScalar *marray; 729 730 n = n_B*pcbddc->local_primal_size; 731 if (pcbddc->switch_static || pcbddc->dbg_flag) { 732 n += n_D*pcbddc->local_primal_size; 733 } 734 if (!pcbddc->issym) { 735 n *= 2; 736 } 737 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 739 n = n_B*pcbddc->local_primal_size; 740 if (pcbddc->switch_static || pcbddc->dbg_flag) { 741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 742 n += n_D*pcbddc->local_primal_size; 743 } 744 if (!pcbddc->issym) { 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 747 n = n_B*pcbddc->local_primal_size; 748 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 749 } 750 } else { 751 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 752 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 755 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 756 } 757 } 758 } 759 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 760 /* vertices */ 761 if (n_vertices) { 762 763 if (n_R) { 764 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 765 PetscBLASInt B_N,B_one = 1; 766 PetscScalar *x,*y; 767 768 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 770 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 771 if (F) { 772 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 773 } else { 774 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 775 for (i=0;i<n_vertices;i++) { 776 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 777 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 778 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 779 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 780 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 781 } 782 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 783 } 784 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 785 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 786 if (n_constraints) { 787 Mat B; 788 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 790 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 791 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 792 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 793 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 794 ierr = MatDestroy(&B);CHKERRQ(ierr); 795 } 796 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 797 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 798 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 799 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 800 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 801 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 802 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 803 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 804 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 805 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 806 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 807 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 808 } else { 809 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 810 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 811 } 812 /* coarse basis functions */ 813 for (i=0;i<n_vertices;i++) { 814 PetscScalar *y; 815 816 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 817 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 818 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 819 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 821 y[n_B*i+idx_V_B[i]] = 1.0; 822 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 823 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 824 825 if (pcbddc->switch_static || pcbddc->dbg_flag) { 826 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 827 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 828 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 831 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 832 } 833 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 834 } 835 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 836 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 837 } 838 839 if (n_constraints) { 840 Mat B; 841 842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 843 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 844 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 845 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 846 if (n_vertices) { 847 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 848 } 849 ierr = MatDestroy(&B);CHKERRQ(ierr); 850 /* coarse basis functions */ 851 for (i=0;i<n_constraints;i++) { 852 PetscScalar *y; 853 854 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 855 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 856 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 857 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 858 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 860 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 861 862 if (pcbddc->switch_static || pcbddc->dbg_flag) { 863 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 864 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 865 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 868 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 869 } 870 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 871 } 872 } 873 874 /* compute other basis functions for non-symmetric problems */ 875 if (!pcbddc->issym) { 876 Mat B,X; 877 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 879 880 if (n_constraints) { 881 Mat S_CCT,B_C; 882 883 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 884 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 885 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 886 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 887 if (n_vertices) { 888 Mat B_V,S_VCT; 889 890 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 891 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 892 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 893 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 894 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 895 } 896 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 897 } 898 if (n_vertices && n_R) { 899 Mat A_VRT; 900 PetscBLASInt B_N,B_one = 1; 901 902 if (!n_constraints) { /* if there are no constraints, reset work */ 903 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 904 } 905 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 906 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 907 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 908 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 909 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 910 } 911 912 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 913 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 914 for (i=0;i<pcbddc->local_primal_size;i++) { 915 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 916 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 917 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 919 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 920 } 921 } else { 922 for (i=0;i<pcbddc->local_primal_size;i++) { 923 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 924 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 925 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 926 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 927 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 928 } 929 } 930 ierr = MatDestroy(&B);CHKERRQ(ierr); 931 /* coarse basis functions */ 932 for (i=0;i<pcbddc->local_primal_size;i++) { 933 PetscScalar *y; 934 935 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 936 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 937 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 938 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 939 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 if (i<n_vertices) { 941 y[n_B*i+idx_V_B[i]] = 1.0; 942 } 943 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 944 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 945 946 if (pcbddc->switch_static || pcbddc->dbg_flag) { 947 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 948 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 949 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 952 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 953 } 954 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 955 } 956 ierr = MatDestroy(&X);CHKERRQ(ierr); 957 } 958 959 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 960 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 961 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 962 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 963 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 964 /* Checking coarse_sub_mat and coarse basis functios */ 965 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 966 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 967 if (pcbddc->dbg_flag) { 968 Mat coarse_sub_mat; 969 Mat AUXMAT,TM1,TM2,TM3,TM4; 970 Mat coarse_phi_D,coarse_phi_B; 971 Mat coarse_psi_D,coarse_psi_B; 972 Mat A_II,A_BB,A_IB,A_BI; 973 MatType checkmattype=MATSEQAIJ; 974 PetscReal real_value; 975 976 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 977 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 978 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 979 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 980 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 981 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 982 if (unsymmetric_check) { 983 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 984 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 985 } 986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 987 988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 989 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 991 if (unsymmetric_check) { 992 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 993 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 994 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 995 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 996 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 997 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 998 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 999 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1000 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1001 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1002 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1003 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1004 } else { 1005 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1006 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1007 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1008 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1009 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1010 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1011 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1012 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1013 } 1014 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1015 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1016 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1017 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1018 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1019 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 1020 1021 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1022 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1023 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1024 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1025 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1026 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1027 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1028 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1029 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1030 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1031 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1032 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1033 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1034 if (unsymmetric_check) { 1035 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1036 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1037 } 1038 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1039 } 1040 1041 /* free memory */ 1042 ierr = PetscFree(work);CHKERRQ(ierr); 1043 if (n_vertices) { 1044 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1045 } 1046 if (n_constraints) { 1047 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1048 } 1049 /* get back data */ 1050 *coarse_submat_vals_n = coarse_submat_vals; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1056 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1057 { 1058 Mat *work_mat; 1059 IS isrow_s,iscol_s; 1060 PetscBool rsorted,csorted; 1061 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1066 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1067 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1068 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1069 1070 if (!rsorted) { 1071 const PetscInt *idxs; 1072 PetscInt *idxs_sorted,i; 1073 1074 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1076 for (i=0;i<rsize;i++) { 1077 idxs_perm_r[i] = i; 1078 } 1079 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1080 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1081 for (i=0;i<rsize;i++) { 1082 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1083 } 1084 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1085 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1086 } else { 1087 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1088 isrow_s = isrow; 1089 } 1090 1091 if (!csorted) { 1092 if (isrow == iscol) { 1093 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1094 iscol_s = isrow_s; 1095 } else { 1096 const PetscInt *idxs; 1097 PetscInt *idxs_sorted,i; 1098 1099 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1100 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1101 for (i=0;i<csize;i++) { 1102 idxs_perm_c[i] = i; 1103 } 1104 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1105 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1106 for (i=0;i<csize;i++) { 1107 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1108 } 1109 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1110 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1111 } 1112 } else { 1113 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1114 iscol_s = iscol; 1115 } 1116 1117 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1118 1119 if (!rsorted || !csorted) { 1120 Mat new_mat; 1121 IS is_perm_r,is_perm_c; 1122 1123 if (!rsorted) { 1124 PetscInt *idxs_r,i; 1125 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1126 for (i=0;i<rsize;i++) { 1127 idxs_r[idxs_perm_r[i]] = i; 1128 } 1129 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1131 } else { 1132 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1133 } 1134 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1135 1136 if (!csorted) { 1137 if (isrow_s == iscol_s) { 1138 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1139 is_perm_c = is_perm_r; 1140 } else { 1141 PetscInt *idxs_c,i; 1142 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1143 for (i=0;i<csize;i++) { 1144 idxs_c[idxs_perm_c[i]] = i; 1145 } 1146 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1147 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1148 } 1149 } else { 1150 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1151 } 1152 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1153 1154 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1155 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1156 work_mat[0] = new_mat; 1157 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1158 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1159 } 1160 1161 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1162 *B = work_mat[0]; 1163 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1164 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1165 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1171 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1172 { 1173 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1174 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1175 Mat new_mat; 1176 IS is_local,is_global; 1177 PetscInt local_size; 1178 PetscBool isseqaij; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1183 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1184 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1185 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1186 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1187 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1188 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1189 1190 /* check */ 1191 if (pcbddc->dbg_flag) { 1192 Vec x,x_change; 1193 PetscReal error; 1194 1195 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1196 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1197 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1198 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1199 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1200 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1201 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1203 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1204 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1206 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1207 ierr = VecDestroy(&x);CHKERRQ(ierr); 1208 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1209 } 1210 1211 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1212 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1213 if (isseqaij) { 1214 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1215 } else { 1216 Mat work_mat; 1217 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1218 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1219 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1220 } 1221 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1222 /* 1223 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1224 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1225 */ 1226 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1232 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1233 { 1234 PC_IS* pcis = (PC_IS*)(pc->data); 1235 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1236 IS is_aux1,is_aux2; 1237 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1238 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1239 PetscInt vbs,bs; 1240 PetscBT bitmask; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 /* 1245 No need to setup local scatters if 1246 - primal space is unchanged 1247 AND 1248 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1249 AND 1250 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1251 */ 1252 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1253 PetscFunctionReturn(0); 1254 } 1255 /* destroy old objects */ 1256 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1257 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1258 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1259 /* Set Non-overlapping dimensions */ 1260 n_B = pcis->n_B; n_D = pcis->n - n_B; 1261 n_vertices = pcbddc->n_actual_vertices; 1262 /* create auxiliary bitmask */ 1263 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1264 for (i=0;i<n_vertices;i++) { 1265 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1266 } 1267 1268 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1269 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1270 for (i=0, n_R=0; i<pcis->n; i++) { 1271 if (!PetscBTLookup(bitmask,i)) { 1272 idx_R_local[n_R] = i; 1273 n_R++; 1274 } 1275 } 1276 1277 /* Block code */ 1278 vbs = 1; 1279 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1280 if (bs>1 && !(n_vertices%bs)) { 1281 PetscBool is_blocked = PETSC_TRUE; 1282 PetscInt *vary; 1283 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1284 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1285 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1286 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1287 for (i=0; i<n_vertices; i++) { 1288 if (vary[i]!=0 && vary[i]!=bs) { 1289 is_blocked = PETSC_FALSE; 1290 break; 1291 } 1292 } 1293 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1294 vbs = bs; 1295 for (i=0;i<n_R/vbs;i++) { 1296 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1297 } 1298 } 1299 ierr = PetscFree(vary);CHKERRQ(ierr); 1300 } 1301 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1302 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1303 1304 /* print some info if requested */ 1305 if (pcbddc->dbg_flag) { 1306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1307 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1310 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1313 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1314 } 1315 1316 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1317 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1318 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1319 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1320 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1321 for (i=0; i<n_D; i++) { 1322 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1323 } 1324 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1325 for (i=0, j=0; i<n_R; i++) { 1326 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1327 aux_array1[j++] = i; 1328 } 1329 } 1330 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1331 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1332 for (i=0, j=0; i<n_B; i++) { 1333 if (!PetscBTLookup(bitmask,is_indices[i])) { 1334 aux_array2[j++] = i; 1335 } 1336 } 1337 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1338 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1339 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1340 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1341 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1342 1343 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1344 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1345 for (i=0, j=0; i<n_R; i++) { 1346 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1347 aux_array1[j++] = i; 1348 } 1349 } 1350 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1351 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1352 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1353 } 1354 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1355 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1362 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1363 { 1364 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1365 PC_IS *pcis = (PC_IS*)pc->data; 1366 PC pc_temp; 1367 Mat A_RR; 1368 MatReuse reuse; 1369 PetscScalar m_one = -1.0; 1370 PetscReal value; 1371 PetscInt n_D,n_R,ibs,mbs; 1372 PetscBool use_exact,use_exact_reduced,issbaij; 1373 PetscErrorCode ierr; 1374 /* prefixes stuff */ 1375 char dir_prefix[256],neu_prefix[256],str_level[16]; 1376 size_t len; 1377 1378 PetscFunctionBegin; 1379 1380 /* compute prefixes */ 1381 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1382 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1383 if (!pcbddc->current_level) { 1384 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1385 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1386 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1387 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1388 } else { 1389 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1390 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1391 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1392 len -= 15; /* remove "pc_bddc_coarse_" */ 1393 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1394 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1395 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1396 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1397 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1398 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1399 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1400 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1401 } 1402 1403 /* DIRICHLET PROBLEM */ 1404 if (dirichlet) { 1405 /* Matrix for Dirichlet problem is pcis->A_II */ 1406 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1407 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1408 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1409 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1410 /* default */ 1411 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1412 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1414 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1415 if (issbaij) { 1416 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1417 } else { 1418 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1419 } 1420 /* Allow user's customization */ 1421 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1422 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1423 } 1424 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1425 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1426 if (!n_D) { 1427 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1428 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1429 } 1430 /* Set Up KSP for Dirichlet problem of BDDC */ 1431 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1432 /* set ksp_D into pcis data */ 1433 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1434 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1435 pcis->ksp_D = pcbddc->ksp_D; 1436 } 1437 1438 /* NEUMANN PROBLEM */ 1439 A_RR = 0; 1440 if (neumann) { 1441 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1442 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1443 if (pcbddc->ksp_R) { /* already created ksp */ 1444 PetscInt nn_R; 1445 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1446 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1447 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1448 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1449 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1450 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1451 reuse = MAT_INITIAL_MATRIX; 1452 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1453 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1454 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1455 reuse = MAT_INITIAL_MATRIX; 1456 } else { /* safe to reuse the matrix */ 1457 reuse = MAT_REUSE_MATRIX; 1458 } 1459 } 1460 /* last check */ 1461 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1462 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1463 reuse = MAT_INITIAL_MATRIX; 1464 } 1465 } else { /* first time, so we need to create the matrix */ 1466 reuse = MAT_INITIAL_MATRIX; 1467 } 1468 /* extract A_RR */ 1469 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1470 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1471 if (ibs != mbs) { 1472 Mat newmat; 1473 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1474 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1475 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1476 } else { 1477 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1478 } 1479 if (!pcbddc->ksp_R) { /* create object if not present */ 1480 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1481 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1482 /* default */ 1483 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1484 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1485 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1486 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1487 if (issbaij) { 1488 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1489 } else { 1490 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1491 } 1492 /* Allow user's customization */ 1493 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1494 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1495 } 1496 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1497 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1498 if (!n_R) { 1499 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1500 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1501 } 1502 /* Set Up KSP for Neumann problem of BDDC */ 1503 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1504 } 1505 1506 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1507 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1508 if (pcbddc->dbg_flag) { 1509 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1510 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1511 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1512 } 1513 if (dirichlet) { /* Dirichlet */ 1514 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1515 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1516 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1517 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1518 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1519 /* need to be adapted? */ 1520 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1521 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1522 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1523 /* print info */ 1524 if (pcbddc->dbg_flag) { 1525 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1527 } 1528 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1529 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1530 } 1531 } 1532 if (neumann) { /* Neumann */ 1533 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1534 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1535 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1536 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1537 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1538 /* need to be adapted? */ 1539 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1540 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1541 /* print info */ 1542 if (pcbddc->dbg_flag) { 1543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1544 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1545 } 1546 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1547 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1548 } 1549 } 1550 } 1551 /* free Neumann problem's matrix */ 1552 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1558 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1559 { 1560 PetscErrorCode ierr; 1561 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1562 1563 PetscFunctionBegin; 1564 if (applytranspose) { 1565 if (pcbddc->local_auxmat1) { 1566 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1567 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1568 } 1569 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1570 } else { 1571 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1572 if (pcbddc->local_auxmat1) { 1573 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1574 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1575 } 1576 } 1577 PetscFunctionReturn(0); 1578 } 1579 1580 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1583 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1584 { 1585 PetscErrorCode ierr; 1586 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1587 PC_IS* pcis = (PC_IS*) (pc->data); 1588 const PetscScalar zero = 0.0; 1589 1590 PetscFunctionBegin; 1591 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1592 if (applytranspose) { 1593 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1594 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1595 } else { 1596 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1597 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1598 } 1599 /* start communications from local primal nodes to rhs of coarse solver */ 1600 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1601 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1602 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1603 1604 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1605 /* TODO remove null space when doing multilevel */ 1606 if (pcbddc->coarse_ksp) { 1607 if (applytranspose) { 1608 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1609 } else { 1610 ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr); 1611 } 1612 } 1613 1614 /* Local solution on R nodes */ 1615 if (pcis->n) { 1616 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1617 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1619 if (pcbddc->switch_static) { 1620 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1621 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1622 } 1623 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1624 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1625 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1627 if (pcbddc->switch_static) { 1628 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 } 1631 } 1632 1633 /* communications from coarse sol to local primal nodes */ 1634 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 1637 /* Sum contributions from two levels */ 1638 if (applytranspose) { 1639 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1640 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1641 } else { 1642 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1643 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1644 } 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1650 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1651 { 1652 PetscErrorCode ierr; 1653 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1654 PetscScalar *array; 1655 Vec from,to; 1656 1657 PetscFunctionBegin; 1658 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1659 from = pcbddc->coarse_vec; 1660 to = pcbddc->vec1_P; 1661 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1662 Vec tvec; 1663 1664 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1665 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1666 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1667 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1668 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1669 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1670 } 1671 } else { /* from local to global -> put data in coarse right hand side */ 1672 from = pcbddc->vec1_P; 1673 to = pcbddc->coarse_vec; 1674 } 1675 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1681 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1682 { 1683 PetscErrorCode ierr; 1684 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1685 PetscScalar *array; 1686 Vec from,to; 1687 1688 PetscFunctionBegin; 1689 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1690 from = pcbddc->coarse_vec; 1691 to = pcbddc->vec1_P; 1692 } else { /* from local to global -> put data in coarse right hand side */ 1693 from = pcbddc->vec1_P; 1694 to = pcbddc->coarse_vec; 1695 } 1696 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1697 if (smode == SCATTER_FORWARD) { 1698 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1699 Vec tvec; 1700 1701 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1702 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1703 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1704 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1705 } 1706 } else { 1707 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1708 ierr = VecResetArray(from);CHKERRQ(ierr); 1709 } 1710 } 1711 PetscFunctionReturn(0); 1712 } 1713 1714 /* uncomment for testing purposes */ 1715 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1718 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1719 { 1720 PetscErrorCode ierr; 1721 PC_IS* pcis = (PC_IS*)(pc->data); 1722 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1723 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1724 /* one and zero */ 1725 PetscScalar one=1.0,zero=0.0; 1726 /* space to store constraints and their local indices */ 1727 PetscScalar *temp_quadrature_constraint; 1728 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1729 /* iterators */ 1730 PetscInt i,j,k,total_counts,temp_start_ptr; 1731 /* BLAS integers */ 1732 PetscBLASInt lwork,lierr; 1733 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1734 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1735 /* reuse */ 1736 PetscInt olocal_primal_size; 1737 PetscInt *oprimal_indices_local_idxs; 1738 /* change of basis */ 1739 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1740 PetscBool boolforchange,qr_needed; 1741 PetscBT touched,change_basis,qr_needed_idx; 1742 /* auxiliary stuff */ 1743 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1744 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1745 PetscScalar *temp_quadrature_constraint_work=NULL; 1746 /* some quantities */ 1747 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1748 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1749 1750 PetscFunctionBegin; 1751 /* Destroy Mat objects computed previously */ 1752 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1753 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1754 1755 /* print some info */ 1756 if (pcbddc->dbg_flag) { 1757 IS vertices; 1758 PetscInt nv,nedges,nfaces; 1759 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1760 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1761 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1762 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1763 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1764 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1765 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1766 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1767 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1768 } 1769 1770 if (!pcbddc->adaptive_selection) { 1771 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1772 MatNullSpace nearnullsp; 1773 const Vec *nearnullvecs; 1774 Vec *localnearnullsp; 1775 PetscScalar *array; 1776 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1777 PetscBool nnsp_has_cnst; 1778 /* LAPACK working arrays for SVD or POD */ 1779 PetscBool skip_lapack; 1780 PetscScalar *work; 1781 PetscReal *singular_vals; 1782 #if defined(PETSC_USE_COMPLEX) 1783 PetscReal *rwork; 1784 #endif 1785 #if defined(PETSC_MISSING_LAPACK_GESVD) 1786 PetscScalar *temp_basis,*correlation_mat; 1787 #else 1788 PetscBLASInt dummy_int; 1789 PetscScalar dummy_scalar; 1790 #endif 1791 1792 /* Get index sets for faces, edges and vertices from graph */ 1793 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1794 /* free unneeded index sets */ 1795 if (!pcbddc->use_vertices) { 1796 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1797 } 1798 if (!pcbddc->use_edges) { 1799 for (i=0;i<n_ISForEdges;i++) { 1800 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1801 } 1802 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1803 n_ISForEdges = 0; 1804 } 1805 if (!pcbddc->use_faces) { 1806 for (i=0;i<n_ISForFaces;i++) { 1807 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1808 } 1809 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1810 n_ISForFaces = 0; 1811 } 1812 /* HACKS (the following two blocks of code) */ 1813 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1814 pcbddc->use_change_of_basis = PETSC_TRUE; 1815 if (!ISForEdges) { 1816 pcbddc->use_change_on_faces = PETSC_TRUE; 1817 } 1818 } 1819 if (pcbddc->NullSpace) { 1820 /* use_change_of_basis should be consistent among processors */ 1821 PetscBool tbool[2],gbool[2]; 1822 tbool [0] = pcbddc->use_change_of_basis; 1823 tbool [1] = pcbddc->use_change_on_faces; 1824 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1825 pcbddc->use_change_of_basis = gbool[0]; 1826 pcbddc->use_change_on_faces = gbool[1]; 1827 } 1828 1829 /* check if near null space is attached to global mat */ 1830 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1831 if (nearnullsp) { 1832 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1833 /* remove any stored info */ 1834 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1835 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1836 /* store information for BDDC solver reuse */ 1837 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1838 pcbddc->onearnullspace = nearnullsp; 1839 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1840 for (i=0;i<nnsp_size;i++) { 1841 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1842 } 1843 } else { /* if near null space is not provided BDDC uses constants by default */ 1844 nnsp_size = 0; 1845 nnsp_has_cnst = PETSC_TRUE; 1846 } 1847 /* get max number of constraints on a single cc */ 1848 max_constraints = nnsp_size; 1849 if (nnsp_has_cnst) max_constraints++; 1850 1851 /* 1852 Evaluate maximum storage size needed by the procedure 1853 - temp_indices will contain start index of each constraint stored as follows 1854 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1855 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1856 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1857 */ 1858 total_counts = n_ISForFaces+n_ISForEdges; 1859 total_counts *= max_constraints; 1860 n_vertices = 0; 1861 if (ISForVertices) { 1862 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1863 } 1864 total_counts += n_vertices; 1865 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1866 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1867 total_counts = 0; 1868 max_size_of_constraint = 0; 1869 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1870 if (i<n_ISForEdges) { 1871 used_IS = &ISForEdges[i]; 1872 } else { 1873 used_IS = &ISForFaces[i-n_ISForEdges]; 1874 } 1875 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1876 total_counts += j; 1877 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1878 } 1879 total_counts *= max_constraints; 1880 total_counts += n_vertices; 1881 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1882 /* get local part of global near null space vectors */ 1883 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1884 for (k=0;k<nnsp_size;k++) { 1885 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1886 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1887 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1888 } 1889 1890 /* whether or not to skip lapack calls */ 1891 skip_lapack = PETSC_TRUE; 1892 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1893 1894 /* allocate some auxiliary stuff */ 1895 if (!skip_lapack || pcbddc->use_qr_single) { 1896 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1897 } else { 1898 gidxs = NULL; 1899 permutation = NULL; 1900 temp_indices_to_constraint_work = NULL; 1901 temp_quadrature_constraint_work = NULL; 1902 } 1903 1904 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1905 if (!skip_lapack) { 1906 PetscScalar temp_work; 1907 1908 #if defined(PETSC_MISSING_LAPACK_GESVD) 1909 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1910 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1911 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1912 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1913 #if defined(PETSC_USE_COMPLEX) 1914 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1915 #endif 1916 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1917 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1918 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1919 lwork = -1; 1920 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1921 #if !defined(PETSC_USE_COMPLEX) 1922 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1923 #else 1924 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1925 #endif 1926 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1927 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1928 #else /* on missing GESVD */ 1929 /* SVD */ 1930 PetscInt max_n,min_n; 1931 max_n = max_size_of_constraint; 1932 min_n = max_constraints; 1933 if (max_size_of_constraint < max_constraints) { 1934 min_n = max_size_of_constraint; 1935 max_n = max_constraints; 1936 } 1937 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1938 #if defined(PETSC_USE_COMPLEX) 1939 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1940 #endif 1941 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1942 lwork = -1; 1943 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1944 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1945 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1947 #if !defined(PETSC_USE_COMPLEX) 1948 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1949 #else 1950 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1951 #endif 1952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1953 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1954 #endif /* on missing GESVD */ 1955 /* Allocate optimal workspace */ 1956 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1957 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1958 } 1959 /* Now we can loop on constraining sets */ 1960 total_counts = 0; 1961 temp_indices[0] = 0; 1962 /* vertices */ 1963 if (ISForVertices) { 1964 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1965 if (nnsp_has_cnst) { /* consider all vertices */ 1966 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1967 for (i=0;i<n_vertices;i++) { 1968 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1969 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1970 total_counts++; 1971 } 1972 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1973 PetscBool used_vertex; 1974 for (i=0;i<n_vertices;i++) { 1975 used_vertex = PETSC_FALSE; 1976 k = 0; 1977 while (!used_vertex && k<nnsp_size) { 1978 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1979 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1980 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1981 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1982 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1983 total_counts++; 1984 used_vertex = PETSC_TRUE; 1985 } 1986 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1987 k++; 1988 } 1989 } 1990 } 1991 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1992 n_vertices = total_counts; 1993 } 1994 1995 /* edges and faces */ 1996 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 1997 if (ncc<n_ISForEdges) { 1998 used_IS = &ISForEdges[ncc]; 1999 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2000 } else { 2001 used_IS = &ISForFaces[ncc-n_ISForEdges]; 2002 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2003 } 2004 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2005 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2006 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2007 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2008 /* change of basis should not be performed on local periodic nodes */ 2009 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2010 if (nnsp_has_cnst) { 2011 PetscScalar quad_value; 2012 temp_constraints++; 2013 if (!pcbddc->use_nnsp_true) { 2014 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2015 } else { 2016 quad_value = 1.0; 2017 } 2018 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2019 for (j=0;j<size_of_constraint;j++) { 2020 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2021 } 2022 /* sort by global ordering if using lapack subroutines (not needed!) */ 2023 if (!skip_lapack || pcbddc->use_qr_single) { 2024 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2025 for (j=0;j<size_of_constraint;j++) { 2026 permutation[j]=j; 2027 } 2028 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2029 for (j=0;j<size_of_constraint;j++) { 2030 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2031 } 2032 for (j=0;j<size_of_constraint;j++) { 2033 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2034 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2035 } 2036 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2037 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2038 } 2039 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2040 total_counts++; 2041 } 2042 for (k=0;k<nnsp_size;k++) { 2043 PetscReal real_value; 2044 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2045 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2046 for (j=0;j<size_of_constraint;j++) { 2047 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2048 } 2049 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2050 /* check if array is null on the connected component */ 2051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2052 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2053 if (real_value > 0.0) { /* keep indices and values */ 2054 /* sort by global ordering if using lapack subroutines */ 2055 if (!skip_lapack || pcbddc->use_qr_single) { 2056 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2057 for (j=0;j<size_of_constraint;j++) { 2058 permutation[j]=j; 2059 } 2060 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2061 for (j=0;j<size_of_constraint;j++) { 2062 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2063 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2064 } 2065 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2066 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2067 } 2068 temp_constraints++; 2069 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2070 total_counts++; 2071 } 2072 } 2073 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2074 valid_constraints = temp_constraints; 2075 if (!pcbddc->use_nnsp_true && temp_constraints) { 2076 if (temp_constraints == 1) { /* just normalize the constraint */ 2077 PetscScalar norm; 2078 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2079 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2080 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2081 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2082 } else { /* perform SVD */ 2083 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2084 2085 #if defined(PETSC_MISSING_LAPACK_GESVD) 2086 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2087 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2088 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2089 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2090 from that computed using LAPACKgesvd 2091 -> This is due to a different computation of eigenvectors in LAPACKheev 2092 -> The quality of the POD-computed basis will be the same */ 2093 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2094 /* Store upper triangular part of correlation matrix */ 2095 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2096 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2097 for (j=0;j<temp_constraints;j++) { 2098 for (k=0;k<j+1;k++) { 2099 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2100 } 2101 } 2102 /* compute eigenvalues and eigenvectors of correlation matrix */ 2103 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2104 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2105 #if !defined(PETSC_USE_COMPLEX) 2106 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2107 #else 2108 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2109 #endif 2110 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2111 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2112 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2113 j = 0; 2114 while (j < temp_constraints && singular_vals[j] < tol) j++; 2115 total_counts = total_counts-j; 2116 valid_constraints = temp_constraints-j; 2117 /* scale and copy POD basis into used quadrature memory */ 2118 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2119 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2120 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2122 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2124 if (j<temp_constraints) { 2125 PetscInt ii; 2126 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2127 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2128 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2129 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2130 for (k=0;k<temp_constraints-j;k++) { 2131 for (ii=0;ii<size_of_constraint;ii++) { 2132 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2133 } 2134 } 2135 } 2136 #else /* on missing GESVD */ 2137 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2138 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2139 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2140 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2141 #if !defined(PETSC_USE_COMPLEX) 2142 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2143 #else 2144 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2145 #endif 2146 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2147 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2148 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2149 k = temp_constraints; 2150 if (k > size_of_constraint) k = size_of_constraint; 2151 j = 0; 2152 while (j < k && singular_vals[k-j-1] < tol) j++; 2153 valid_constraints = k-j; 2154 total_counts = total_counts-temp_constraints+valid_constraints; 2155 #endif /* on missing GESVD */ 2156 } 2157 } 2158 /* setting change_of_basis flag is safe now */ 2159 if (boolforchange) { 2160 for (j=0;j<valid_constraints;j++) { 2161 PetscBTSet(change_basis,total_counts-j-1); 2162 } 2163 } 2164 } 2165 /* free workspace */ 2166 if (!skip_lapack || pcbddc->use_qr_single) { 2167 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2168 } 2169 if (!skip_lapack) { 2170 ierr = PetscFree(work);CHKERRQ(ierr); 2171 #if defined(PETSC_USE_COMPLEX) 2172 ierr = PetscFree(rwork);CHKERRQ(ierr); 2173 #endif 2174 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2175 #if defined(PETSC_MISSING_LAPACK_GESVD) 2176 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2177 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2178 #endif 2179 } 2180 for (k=0;k<nnsp_size;k++) { 2181 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2182 } 2183 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2184 /* free index sets of faces, edges and vertices */ 2185 for (i=0;i<n_ISForFaces;i++) { 2186 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2187 } 2188 if (n_ISForFaces) { 2189 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2190 } 2191 for (i=0;i<n_ISForEdges;i++) { 2192 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2193 } 2194 if (n_ISForEdges) { 2195 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2196 } 2197 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2198 } else { 2199 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2200 PetscInt cum = 0; 2201 2202 total_counts = 0; 2203 n_vertices = 0; 2204 if (sub_schurs->is_Ej_com) { 2205 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2206 } 2207 max_constraints = 0; 2208 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2209 total_counts += pcbddc->adaptive_constraints_n[i]; 2210 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2211 } 2212 temp_indices = pcbddc->adaptive_constraints_ptrs; 2213 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2214 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2215 2216 #if 0 2217 printf("Found %d totals\n",total_counts); 2218 for (i=0;i<total_counts;i++) { 2219 printf("const %d, start %d",i,temp_indices[i]); 2220 printf(" end %d:\n",temp_indices[i+1]); 2221 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2222 printf(" idxs %d",temp_indices_to_constraint[j]); 2223 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2224 } 2225 } 2226 for (i=0;i<n_vertices;i++) { 2227 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2228 } 2229 for (i=0;i<sub_schurs->n_subs;i++) { 2230 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2231 } 2232 #endif 2233 2234 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2235 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2236 /* Change of basis */ 2237 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2238 if (pcbddc->use_change_of_basis) { 2239 cum = n_vertices; 2240 for (i=0;i<sub_schurs->n_subs;i++) { 2241 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2242 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2243 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2244 } 2245 } 2246 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2247 } 2248 } 2249 } 2250 2251 /* map temp_indices_to_constraint in boundary numbering */ 2252 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2253 if (i != temp_indices[total_counts]) { 2254 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2255 } 2256 2257 /* set quantities in pcbddc data structure and store previous primal size */ 2258 /* n_vertices defines the number of subdomain corners in the primal space */ 2259 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2260 olocal_primal_size = pcbddc->local_primal_size; 2261 pcbddc->local_primal_size = total_counts; 2262 pcbddc->n_vertices = n_vertices; 2263 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2264 2265 /* Create constraint matrix */ 2266 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2267 /* so we need to set it up properly either with or without change of basis */ 2268 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2269 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2270 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2271 /* array to compute a local numbering of constraints : vertices first then constraints */ 2272 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2273 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2274 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2275 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2276 /* auxiliary stuff for basis change */ 2277 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2278 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2279 2280 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2281 total_primal_vertices=0; 2282 for (i=0;i<pcbddc->local_primal_size;i++) { 2283 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2284 if (size_of_constraint == 1) { 2285 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2286 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2287 aux_primal_minloc[total_primal_vertices]=0; 2288 total_primal_vertices++; 2289 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2290 PetscInt min_loc,min_index; 2291 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2292 /* find first untouched local node */ 2293 k = 0; 2294 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2295 min_index = global_indices[k]; 2296 min_loc = k; 2297 /* search the minimum among global nodes already untouched on the cc */ 2298 for (k=1;k<size_of_constraint;k++) { 2299 /* there can be more than one constraint on a single connected component */ 2300 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2301 min_index = global_indices[k]; 2302 min_loc = k; 2303 } 2304 } 2305 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2306 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2307 aux_primal_minloc[total_primal_vertices]=min_loc; 2308 total_primal_vertices++; 2309 } 2310 } 2311 /* determine if a QR strategy is needed for change of basis */ 2312 qr_needed = PETSC_FALSE; 2313 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2314 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2315 if (PetscBTLookup(change_basis,i)) { 2316 if (!pcbddc->use_qr_single) { 2317 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2318 j = 0; 2319 for (k=0;k<size_of_constraint;k++) { 2320 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2321 j++; 2322 } 2323 } 2324 /* found more than one primal dof on the cc */ 2325 if (j > 1) { 2326 PetscBTSet(qr_needed_idx,i); 2327 qr_needed = PETSC_TRUE; 2328 } 2329 } else { 2330 PetscBTSet(qr_needed_idx,i); 2331 qr_needed = PETSC_TRUE; 2332 } 2333 } 2334 } 2335 /* free workspace */ 2336 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2337 2338 /* permute indices in order to have a sorted set of vertices */ 2339 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2340 2341 /* nonzero structure of constraint matrix */ 2342 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2343 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2344 j=total_primal_vertices; 2345 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2346 if (!PetscBTLookup(change_basis,i)) { 2347 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2348 j++; 2349 } 2350 } 2351 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2352 ierr = PetscFree(nnz);CHKERRQ(ierr); 2353 /* set values in constraint matrix */ 2354 for (i=0;i<total_primal_vertices;i++) { 2355 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2356 } 2357 total_counts = total_primal_vertices; 2358 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2359 if (!PetscBTLookup(change_basis,i)) { 2360 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2361 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2362 total_counts++; 2363 } 2364 } 2365 /* assembling */ 2366 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2367 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2368 /* 2369 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2370 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2371 */ 2372 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2373 if (pcbddc->use_change_of_basis) { 2374 /* dual and primal dofs on a single cc */ 2375 PetscInt dual_dofs,primal_dofs; 2376 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2377 PetscInt primal_counter; 2378 /* working stuff for GEQRF */ 2379 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2380 PetscBLASInt lqr_work; 2381 /* working stuff for UNGQR */ 2382 PetscScalar *gqr_work,lgqr_work_t; 2383 PetscBLASInt lgqr_work; 2384 /* working stuff for TRTRS */ 2385 PetscScalar *trs_rhs; 2386 PetscBLASInt Blas_NRHS; 2387 /* pointers for values insertion into change of basis matrix */ 2388 PetscInt *start_rows,*start_cols; 2389 PetscScalar *start_vals; 2390 /* working stuff for values insertion */ 2391 PetscBT is_primal; 2392 /* matrix sizes */ 2393 PetscInt global_size,local_size; 2394 /* temporary change of basis */ 2395 Mat localChangeOfBasisMatrix; 2396 /* extra space for debugging */ 2397 PetscScalar *dbg_work; 2398 2399 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2400 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2401 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2402 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2403 /* nonzeros for local mat */ 2404 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2405 for (i=0;i<pcis->n;i++) nnz[i]=1; 2406 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2407 if (PetscBTLookup(change_basis,i)) { 2408 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2409 if (PetscBTLookup(qr_needed_idx,i)) { 2410 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2411 } else { 2412 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2413 /* get local primal index on the cc */ 2414 j = 0; 2415 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2416 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2417 } 2418 } 2419 } 2420 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2421 ierr = PetscFree(nnz);CHKERRQ(ierr); 2422 /* Set initial identity in the matrix */ 2423 for (i=0;i<pcis->n;i++) { 2424 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2425 } 2426 2427 if (pcbddc->dbg_flag) { 2428 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2429 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2430 } 2431 2432 2433 /* Now we loop on the constraints which need a change of basis */ 2434 /* 2435 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2436 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2437 2438 Basic blocks of change of basis matrix T computed by 2439 2440 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2441 2442 | 1 0 ... 0 s_1/S | 2443 | 0 1 ... 0 s_2/S | 2444 | ... | 2445 | 0 ... 1 s_{n-1}/S | 2446 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2447 2448 with S = \sum_{i=1}^n s_i^2 2449 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2450 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2451 2452 - QR decomposition of constraints otherwise 2453 */ 2454 if (qr_needed) { 2455 /* space to store Q */ 2456 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2457 /* first we issue queries for optimal work */ 2458 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2459 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2460 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2461 lqr_work = -1; 2462 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2463 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2464 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2465 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2466 lgqr_work = -1; 2467 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2468 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2469 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2470 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2471 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2472 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2473 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2474 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2475 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2476 /* array to store scaling factors for reflectors */ 2477 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2478 /* array to store rhs and solution of triangular solver */ 2479 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2480 /* allocating workspace for check */ 2481 if (pcbddc->dbg_flag) { 2482 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2483 } 2484 } 2485 /* array to store whether a node is primal or not */ 2486 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2487 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2488 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2489 if (i != total_primal_vertices) { 2490 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2491 } 2492 for (i=0;i<total_primal_vertices;i++) { 2493 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2494 } 2495 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2496 2497 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2498 /* -> using implicit ordering contained in temp_indices data */ 2499 total_counts = pcbddc->n_vertices; 2500 primal_counter = total_counts; 2501 while (total_counts<pcbddc->local_primal_size) { 2502 primal_dofs = 1; 2503 if (PetscBTLookup(change_basis,total_counts)) { 2504 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2505 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2506 primal_dofs++; 2507 } 2508 /* get constraint info */ 2509 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2510 dual_dofs = size_of_constraint-primal_dofs; 2511 2512 if (pcbddc->dbg_flag) { 2513 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2514 } 2515 2516 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2517 2518 /* copy quadrature constraints for change of basis check */ 2519 if (pcbddc->dbg_flag) { 2520 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2521 } 2522 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2523 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2524 2525 /* compute QR decomposition of constraints */ 2526 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2527 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2529 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2530 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2531 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2532 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2533 2534 /* explictly compute R^-T */ 2535 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2536 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2537 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2538 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2539 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2540 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2541 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2542 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2543 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2544 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2545 2546 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2547 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2548 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2549 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2550 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2552 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2553 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2554 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2555 2556 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2557 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2558 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2560 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2561 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2563 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2564 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2565 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2566 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2567 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2568 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2569 2570 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2571 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2572 /* insert cols for primal dofs */ 2573 for (j=0;j<primal_dofs;j++) { 2574 start_vals = &qr_basis[j*size_of_constraint]; 2575 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2576 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2577 } 2578 /* insert cols for dual dofs */ 2579 for (j=0,k=0;j<dual_dofs;k++) { 2580 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2581 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2582 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2583 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2584 j++; 2585 } 2586 } 2587 2588 /* check change of basis */ 2589 if (pcbddc->dbg_flag) { 2590 PetscInt ii,jj; 2591 PetscBool valid_qr=PETSC_TRUE; 2592 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2593 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2594 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2595 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2596 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2597 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2598 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2599 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2600 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2601 for (jj=0;jj<size_of_constraint;jj++) { 2602 for (ii=0;ii<primal_dofs;ii++) { 2603 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2604 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2605 } 2606 } 2607 if (!valid_qr) { 2608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2609 for (jj=0;jj<size_of_constraint;jj++) { 2610 for (ii=0;ii<primal_dofs;ii++) { 2611 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2612 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2613 } 2614 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2615 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2616 } 2617 } 2618 } 2619 } else { 2620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2621 } 2622 } 2623 } else { /* simple transformation block */ 2624 PetscInt row,col; 2625 PetscScalar val,norm; 2626 2627 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2628 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2629 for (j=0;j<size_of_constraint;j++) { 2630 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2631 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2632 if (!PetscBTLookup(is_primal,row_B)) { 2633 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2634 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2635 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2636 } else { 2637 for (k=0;k<size_of_constraint;k++) { 2638 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2639 if (row != col) { 2640 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2641 } else { 2642 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2643 } 2644 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2645 } 2646 } 2647 } 2648 if (pcbddc->dbg_flag) { 2649 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2650 } 2651 } 2652 /* increment primal counter */ 2653 primal_counter += primal_dofs; 2654 } else { 2655 if (pcbddc->dbg_flag) { 2656 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2657 } 2658 } 2659 /* increment constraint counter total_counts */ 2660 total_counts += primal_dofs; 2661 } 2662 2663 /* free workspace */ 2664 if (qr_needed) { 2665 if (pcbddc->dbg_flag) { 2666 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2667 } 2668 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2669 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2670 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2671 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2672 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2673 } 2674 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2675 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2676 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2677 2678 /* assembling of global change of variable */ 2679 { 2680 Mat tmat; 2681 PetscInt bs; 2682 2683 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2684 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2685 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2686 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2687 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2688 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2689 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2690 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2691 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2692 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2693 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2694 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2695 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2696 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2697 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2698 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2699 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2700 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2701 } 2702 /* check */ 2703 if (pcbddc->dbg_flag) { 2704 PetscReal error; 2705 Vec x,x_change; 2706 2707 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2708 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2709 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2710 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2711 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2712 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2713 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2714 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2715 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2716 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2717 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2718 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2719 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2720 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2721 ierr = VecDestroy(&x);CHKERRQ(ierr); 2722 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2723 } 2724 2725 /* adapt sub_schurs computed (if any) */ 2726 if (pcbddc->use_deluxe_scaling) { 2727 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2728 if (sub_schurs->n_subs_par_g) { 2729 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2730 } 2731 if (sub_schurs->S_Ej_all) { 2732 Mat S_1,S_2,tmat; 2733 IS is_all_N; 2734 2735 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2736 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2737 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2738 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2739 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2740 sub_schurs->S_Ej_all = S_1; 2741 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2742 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2743 sub_schurs->sum_S_Ej_all = S_2; 2744 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2745 } 2746 } 2747 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2748 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2749 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2750 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2751 } 2752 2753 /* set up change of basis context */ 2754 if (pcbddc->ChangeOfBasisMatrix) { 2755 PCBDDCChange_ctx change_ctx; 2756 2757 if (!pcbddc->new_global_mat) { 2758 PetscInt global_size,local_size; 2759 2760 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2761 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2762 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2763 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2764 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2765 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2766 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2767 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2768 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2769 } else { 2770 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2771 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2772 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2773 } 2774 if (!pcbddc->user_ChangeOfBasisMatrix) { 2775 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2776 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2777 } else { 2778 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2779 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2780 } 2781 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2782 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2783 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2784 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2785 } 2786 2787 /* get indices in local ordering for vertices and constraints */ 2788 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2789 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2790 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2791 } 2792 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2793 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2794 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2795 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2796 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2797 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2798 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2799 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2800 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2801 /* set quantities in PCBDDC data struct */ 2802 pcbddc->n_actual_vertices = i; 2803 /* check if a new primal space has been introduced */ 2804 pcbddc->new_primal_space_local = PETSC_TRUE; 2805 if (olocal_primal_size == pcbddc->local_primal_size) { 2806 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2807 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2808 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2809 } 2810 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2811 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2812 2813 /* flush dbg viewer */ 2814 if (pcbddc->dbg_flag) { 2815 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2816 } 2817 2818 /* free workspace */ 2819 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2820 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2821 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2822 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2823 if (!pcbddc->adaptive_selection) { 2824 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2825 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2826 } else { 2827 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2828 pcbddc->adaptive_constraints_ptrs, 2829 pcbddc->adaptive_constraints_idxs, 2830 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2831 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2832 } 2833 PetscFunctionReturn(0); 2834 } 2835 2836 #undef __FUNCT__ 2837 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2838 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2839 { 2840 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2841 PC_IS *pcis = (PC_IS*)pc->data; 2842 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2843 PetscInt ierr,i,vertex_size; 2844 PetscViewer viewer=pcbddc->dbg_viewer; 2845 2846 PetscFunctionBegin; 2847 /* Reset previously computed graph */ 2848 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2849 /* Init local Graph struct */ 2850 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2851 2852 /* Check validity of the csr graph passed in by the user */ 2853 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2854 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2855 } 2856 2857 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2858 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2859 PetscInt *xadj,*adjncy; 2860 PetscInt nvtxs; 2861 2862 if (pcbddc->use_local_adj) { 2863 PetscBool flg_row=PETSC_FALSE; 2864 2865 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2866 if (flg_row) { 2867 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2868 pcbddc->computed_rowadj = PETSC_TRUE; 2869 } 2870 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2871 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2872 IS is_dummy; 2873 ISLocalToGlobalMapping l2gmap_dummy; 2874 PetscInt j,sum; 2875 PetscInt *cxadj,*cadjncy; 2876 const PetscInt *idxs; 2877 PCBDDCGraph graph; 2878 PetscBT is_on_boundary; 2879 2880 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2881 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2882 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2883 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2884 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2885 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2886 graph->xadj = xadj; 2887 graph->adjncy = adjncy; 2888 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2889 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2890 2891 if (pcbddc->dbg_flag) { 2892 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2893 for (i=0;i<graph->ncc;i++) { 2894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2895 } 2896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2897 } 2898 2899 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2900 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2901 for (i=0;i<pcis->n_B;i++) { 2902 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2903 } 2904 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2905 2906 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2907 sum = 0; 2908 for (i=0;i<graph->ncc;i++) { 2909 PetscInt sizecc = 0; 2910 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2911 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2912 sizecc++; 2913 } 2914 } 2915 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2916 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2917 cxadj[graph->queue[j]] = sizecc; 2918 } 2919 } 2920 sum += sizecc*sizecc; 2921 } 2922 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2923 sum = 0; 2924 for (i=0;i<nvtxs;i++) { 2925 PetscInt temp = cxadj[i]; 2926 cxadj[i] = sum; 2927 sum += temp; 2928 } 2929 cxadj[nvtxs] = sum; 2930 for (i=0;i<graph->ncc;i++) { 2931 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2932 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2933 PetscInt k,sizecc = 0; 2934 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2935 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2936 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2937 sizecc++; 2938 } 2939 } 2940 } 2941 } 2942 } 2943 if (nvtxs) { 2944 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2945 } else { 2946 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2947 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2948 } 2949 graph->xadj = 0; 2950 graph->adjncy = 0; 2951 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2952 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2953 } 2954 } 2955 2956 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2957 vertex_size = 1; 2958 if (pcbddc->user_provided_isfordofs) { 2959 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2960 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2961 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2962 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2963 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2964 } 2965 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2966 pcbddc->n_ISForDofs = 0; 2967 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2968 } 2969 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2970 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2971 } else { 2972 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2973 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2974 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2975 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2976 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2977 } 2978 } 2979 } 2980 2981 /* Setup of Graph */ 2982 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2983 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2984 } 2985 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2986 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2987 } 2988 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2989 2990 /* Graph's connected components analysis */ 2991 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2992 2993 /* print some info to stdout */ 2994 if (pcbddc->dbg_flag) { 2995 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2996 } 2997 2998 /* mark topography has done */ 2999 pcbddc->recompute_topography = PETSC_FALSE; 3000 PetscFunctionReturn(0); 3001 } 3002 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3005 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3006 { 3007 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3008 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3009 PetscErrorCode ierr; 3010 3011 PetscFunctionBegin; 3012 n = 0; 3013 vertices = 0; 3014 if (pcbddc->ConstraintMatrix) { 3015 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3016 for (i=0;i<local_primal_size;i++) { 3017 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3018 if (size_of_constraint == 1) n++; 3019 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3020 } 3021 if (vertices_idx) { 3022 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3023 n = 0; 3024 for (i=0;i<local_primal_size;i++) { 3025 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3026 if (size_of_constraint == 1) { 3027 vertices[n++]=row_cmat_indices[0]; 3028 } 3029 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3030 } 3031 } 3032 } 3033 *n_vertices = n; 3034 if (vertices_idx) *vertices_idx = vertices; 3035 PetscFunctionReturn(0); 3036 } 3037 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3040 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3041 { 3042 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3043 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3044 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3045 PetscBT touched; 3046 PetscErrorCode ierr; 3047 3048 /* This function assumes that the number of local constraints per connected component 3049 is not greater than the number of nodes defined for the connected component 3050 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3051 PetscFunctionBegin; 3052 n = 0; 3053 constraints_index = 0; 3054 if (pcbddc->ConstraintMatrix) { 3055 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3056 max_size_of_constraint = 0; 3057 for (i=0;i<local_primal_size;i++) { 3058 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3059 if (size_of_constraint > 1) { 3060 n++; 3061 } 3062 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3063 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3064 } 3065 if (constraints_idx) { 3066 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3067 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3068 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3069 n = 0; 3070 for (i=0;i<local_primal_size;i++) { 3071 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3072 if (size_of_constraint > 1) { 3073 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3074 /* find first untouched local node */ 3075 j = 0; 3076 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3077 min_index = row_cmat_global_indices[j]; 3078 min_loc = j; 3079 /* search the minimum among nodes not yet touched on the connected component 3080 since there can be more than one constraint on a single cc */ 3081 for (j=1;j<size_of_constraint;j++) { 3082 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3083 min_index = row_cmat_global_indices[j]; 3084 min_loc = j; 3085 } 3086 } 3087 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3088 constraints_index[n++] = row_cmat_indices[min_loc]; 3089 } 3090 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3091 } 3092 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3093 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3094 } 3095 } 3096 *n_constraints = n; 3097 if (constraints_idx) *constraints_idx = constraints_index; 3098 PetscFunctionReturn(0); 3099 } 3100 3101 #undef __FUNCT__ 3102 #define __FUNCT__ "PCBDDCSubsetNumbering" 3103 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3104 { 3105 Vec local_vec,global_vec; 3106 IS seqis,paris; 3107 VecScatter scatter_ctx; 3108 PetscScalar *array; 3109 PetscInt *temp_global_dofs; 3110 PetscScalar globalsum; 3111 PetscInt i,j,s; 3112 PetscInt nlocals,first_index,old_index,max_local; 3113 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3114 PetscMPIInt *dof_sizes,*dof_displs; 3115 PetscBool first_found; 3116 PetscErrorCode ierr; 3117 3118 PetscFunctionBegin; 3119 /* mpi buffers */ 3120 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3121 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3122 j = ( !rank_prec_comm ? size_prec_comm : 0); 3123 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3124 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3125 /* get maximum size of subset */ 3126 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3127 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3128 max_local = 0; 3129 for (i=0;i<n_local_dofs;i++) { 3130 if (max_local < temp_global_dofs[i] ) { 3131 max_local = temp_global_dofs[i]; 3132 } 3133 } 3134 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3135 max_global++; 3136 max_local = 0; 3137 for (i=0;i<n_local_dofs;i++) { 3138 if (max_local < local_dofs[i] ) { 3139 max_local = local_dofs[i]; 3140 } 3141 } 3142 max_local++; 3143 /* allocate workspace */ 3144 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3145 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3146 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3147 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3148 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3149 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3150 /* create scatter */ 3151 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3152 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3153 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3154 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3155 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3156 /* init array */ 3157 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3158 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3159 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3160 if (local_dofs_mult) { 3161 for (i=0;i<n_local_dofs;i++) { 3162 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3163 } 3164 } else { 3165 for (i=0;i<n_local_dofs;i++) { 3166 array[local_dofs[i]]=1.0; 3167 } 3168 } 3169 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3170 /* scatter into global vec and get total number of global dofs */ 3171 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3172 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3173 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3174 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3175 /* Fill global_vec with cumulative function for global numbering */ 3176 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3177 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3178 nlocals = 0; 3179 first_index = -1; 3180 first_found = PETSC_FALSE; 3181 for (i=0;i<s;i++) { 3182 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3183 first_found = PETSC_TRUE; 3184 first_index = i; 3185 } 3186 nlocals += (PetscInt)PetscRealPart(array[i]); 3187 } 3188 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3189 if (!rank_prec_comm) { 3190 dof_displs[0]=0; 3191 for (i=1;i<size_prec_comm;i++) { 3192 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3193 } 3194 } 3195 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3196 if (first_found) { 3197 array[first_index] += (PetscScalar)nlocals; 3198 old_index = first_index; 3199 for (i=first_index+1;i<s;i++) { 3200 if (PetscRealPart(array[i]) > 0.1) { 3201 array[i] += array[old_index]; 3202 old_index = i; 3203 } 3204 } 3205 } 3206 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3207 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3208 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3209 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3210 /* get global ordering of local dofs */ 3211 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3212 if (local_dofs_mult) { 3213 for (i=0;i<n_local_dofs;i++) { 3214 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3215 } 3216 } else { 3217 for (i=0;i<n_local_dofs;i++) { 3218 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3219 } 3220 } 3221 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3222 /* free workspace */ 3223 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3224 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3225 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3226 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3227 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3228 /* return pointer to global ordering of local dofs */ 3229 *global_numbering_subset = temp_global_dofs; 3230 PetscFunctionReturn(0); 3231 } 3232 3233 #undef __FUNCT__ 3234 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3235 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3236 { 3237 PetscInt i,j; 3238 PetscScalar *alphas; 3239 PetscErrorCode ierr; 3240 3241 PetscFunctionBegin; 3242 /* this implements stabilized Gram-Schmidt */ 3243 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3244 for (i=0;i<n;i++) { 3245 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3246 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3247 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3248 } 3249 ierr = PetscFree(alphas);CHKERRQ(ierr); 3250 PetscFunctionReturn(0); 3251 } 3252 3253 #undef __FUNCT__ 3254 #define __FUNCT__ "MatISGetSubassemblingPattern" 3255 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3256 { 3257 Mat subdomain_adj; 3258 IS new_ranks,ranks_send_to; 3259 MatPartitioning partitioner; 3260 Mat_IS *matis; 3261 PetscInt n_neighs,*neighs,*n_shared,**shared; 3262 PetscInt prank; 3263 PetscMPIInt size,rank,color; 3264 PetscInt *xadj,*adjncy,*oldranks; 3265 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3266 PetscInt i,local_size,threshold=0; 3267 PetscErrorCode ierr; 3268 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3269 PetscSubcomm subcomm; 3270 3271 PetscFunctionBegin; 3272 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3273 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3274 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3275 3276 /* Get info on mapping */ 3277 matis = (Mat_IS*)(mat->data); 3278 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3279 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3280 3281 /* build local CSR graph of subdomains' connectivity */ 3282 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3283 xadj[0] = 0; 3284 xadj[1] = PetscMax(n_neighs-1,0); 3285 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3286 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3287 3288 if (threshold) { 3289 PetscInt xadj_count = 0; 3290 for (i=1;i<n_neighs;i++) { 3291 if (n_shared[i] > threshold) { 3292 adjncy[xadj_count] = neighs[i]; 3293 adjncy_wgt[xadj_count] = n_shared[i]; 3294 xadj_count++; 3295 } 3296 } 3297 xadj[1] = xadj_count; 3298 } else { 3299 if (xadj[1]) { 3300 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3301 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3302 } 3303 } 3304 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3305 if (use_square) { 3306 for (i=0;i<xadj[1];i++) { 3307 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3308 } 3309 } 3310 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3311 3312 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3313 3314 /* 3315 Restrict work on active processes only. 3316 */ 3317 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3318 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3319 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3320 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3321 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3322 if (color) { 3323 ierr = PetscFree(xadj);CHKERRQ(ierr); 3324 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3325 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3326 } else { 3327 PetscInt coarsening_ratio; 3328 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3329 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3330 prank = rank; 3331 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3332 /* 3333 for (i=0;i<size;i++) { 3334 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3335 } 3336 */ 3337 for (i=0;i<xadj[1];i++) { 3338 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3339 } 3340 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3341 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3342 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3343 3344 /* Partition */ 3345 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3346 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3347 if (use_vwgt) { 3348 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3349 v_wgt[0] = local_size; 3350 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3351 } 3352 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3353 coarsening_ratio = size/n_subdomains; 3354 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3355 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3356 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3357 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3358 3359 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3360 if (contiguous) { 3361 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3362 } else { 3363 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3364 } 3365 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3366 /* clean up */ 3367 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3368 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3369 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3370 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3371 } 3372 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3373 3374 /* assemble parallel IS for sends */ 3375 i = 1; 3376 if (color) i=0; 3377 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3378 3379 /* get back IS */ 3380 *is_sends = ranks_send_to; 3381 PetscFunctionReturn(0); 3382 } 3383 3384 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3385 3386 #undef __FUNCT__ 3387 #define __FUNCT__ "MatISSubassemble" 3388 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3389 { 3390 Mat local_mat; 3391 Mat_IS *matis; 3392 IS is_sends_internal; 3393 PetscInt rows,cols,new_local_rows; 3394 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3395 PetscBool ismatis,isdense,newisdense,destroy_mat; 3396 ISLocalToGlobalMapping l2gmap; 3397 PetscInt* l2gmap_indices; 3398 const PetscInt* is_indices; 3399 MatType new_local_type; 3400 /* buffers */ 3401 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3402 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3403 PetscInt *recv_buffer_idxs_local; 3404 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3405 /* MPI */ 3406 MPI_Comm comm,comm_n; 3407 PetscSubcomm subcomm; 3408 PetscMPIInt n_sends,n_recvs,commsize; 3409 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3410 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3411 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3412 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3413 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3414 PetscErrorCode ierr; 3415 3416 PetscFunctionBegin; 3417 /* TODO: add missing checks */ 3418 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3419 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3420 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3421 PetscValidLogicalCollectiveInt(mat,nis,7); 3422 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3423 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3424 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3425 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3426 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3427 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3428 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3429 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3430 PetscInt mrows,mcols,mnrows,mncols; 3431 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3432 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3433 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3434 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3435 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3436 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3437 } 3438 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3439 PetscValidLogicalCollectiveInt(mat,bs,0); 3440 /* prepare IS for sending if not provided */ 3441 if (!is_sends) { 3442 PetscBool pcontig = PETSC_TRUE; 3443 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3444 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3445 } else { 3446 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3447 is_sends_internal = is_sends; 3448 } 3449 3450 /* get pointer of MATIS data */ 3451 matis = (Mat_IS*)mat->data; 3452 3453 /* get comm */ 3454 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3455 3456 /* compute number of sends */ 3457 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3458 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3459 3460 /* compute number of receives */ 3461 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3462 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3463 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3464 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3465 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3466 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3467 ierr = PetscFree(iflags);CHKERRQ(ierr); 3468 3469 /* restrict comm if requested */ 3470 subcomm = 0; 3471 destroy_mat = PETSC_FALSE; 3472 if (restrict_comm) { 3473 PetscMPIInt color,subcommsize; 3474 3475 color = 0; 3476 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3477 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3478 subcommsize = commsize - subcommsize; 3479 /* check if reuse has been requested */ 3480 if (reuse == MAT_REUSE_MATRIX) { 3481 if (*mat_n) { 3482 PetscMPIInt subcommsize2; 3483 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3484 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3485 comm_n = PetscObjectComm((PetscObject)*mat_n); 3486 } else { 3487 comm_n = PETSC_COMM_SELF; 3488 } 3489 } else { /* MAT_INITIAL_MATRIX */ 3490 PetscMPIInt rank; 3491 3492 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3493 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3494 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3495 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3496 comm_n = PetscSubcommChild(subcomm); 3497 } 3498 /* flag to destroy *mat_n if not significative */ 3499 if (color) destroy_mat = PETSC_TRUE; 3500 } else { 3501 comm_n = comm; 3502 } 3503 3504 /* prepare send/receive buffers */ 3505 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3506 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3507 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3508 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3509 if (nis) { 3510 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3511 } 3512 3513 /* Get data from local matrices */ 3514 if (!isdense) { 3515 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3516 /* TODO: See below some guidelines on how to prepare the local buffers */ 3517 /* 3518 send_buffer_vals should contain the raw values of the local matrix 3519 send_buffer_idxs should contain: 3520 - MatType_PRIVATE type 3521 - PetscInt size_of_l2gmap 3522 - PetscInt global_row_indices[size_of_l2gmap] 3523 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3524 */ 3525 } else { 3526 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3527 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3528 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3529 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3530 send_buffer_idxs[1] = i; 3531 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3532 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3533 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3534 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3535 for (i=0;i<n_sends;i++) { 3536 ilengths_vals[is_indices[i]] = len*len; 3537 ilengths_idxs[is_indices[i]] = len+2; 3538 } 3539 } 3540 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3541 /* additional is (if any) */ 3542 if (nis) { 3543 PetscMPIInt psum; 3544 PetscInt j; 3545 for (j=0,psum=0;j<nis;j++) { 3546 PetscInt plen; 3547 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3548 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3549 psum += len+1; /* indices + lenght */ 3550 } 3551 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3552 for (j=0,psum=0;j<nis;j++) { 3553 PetscInt plen; 3554 const PetscInt *is_array_idxs; 3555 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3556 send_buffer_idxs_is[psum] = plen; 3557 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3558 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3559 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3560 psum += plen+1; /* indices + lenght */ 3561 } 3562 for (i=0;i<n_sends;i++) { 3563 ilengths_idxs_is[is_indices[i]] = psum; 3564 } 3565 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3566 } 3567 3568 buf_size_idxs = 0; 3569 buf_size_vals = 0; 3570 buf_size_idxs_is = 0; 3571 for (i=0;i<n_recvs;i++) { 3572 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3573 buf_size_vals += (PetscInt)olengths_vals[i]; 3574 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3575 } 3576 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3577 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3578 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3579 3580 /* get new tags for clean communications */ 3581 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3582 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3583 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3584 3585 /* allocate for requests */ 3586 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3587 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3588 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3589 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3590 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3591 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3592 3593 /* communications */ 3594 ptr_idxs = recv_buffer_idxs; 3595 ptr_vals = recv_buffer_vals; 3596 ptr_idxs_is = recv_buffer_idxs_is; 3597 for (i=0;i<n_recvs;i++) { 3598 source_dest = onodes[i]; 3599 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3600 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3601 ptr_idxs += olengths_idxs[i]; 3602 ptr_vals += olengths_vals[i]; 3603 if (nis) { 3604 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3605 ptr_idxs_is += olengths_idxs_is[i]; 3606 } 3607 } 3608 for (i=0;i<n_sends;i++) { 3609 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3610 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3611 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3612 if (nis) { 3613 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3614 } 3615 } 3616 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3617 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3618 3619 /* assemble new l2g map */ 3620 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3621 ptr_idxs = recv_buffer_idxs; 3622 new_local_rows = 0; 3623 for (i=0;i<n_recvs;i++) { 3624 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3625 ptr_idxs += olengths_idxs[i]; 3626 } 3627 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3628 ptr_idxs = recv_buffer_idxs; 3629 new_local_rows = 0; 3630 for (i=0;i<n_recvs;i++) { 3631 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3632 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3633 ptr_idxs += olengths_idxs[i]; 3634 } 3635 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3636 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3637 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3638 3639 /* infer new local matrix type from received local matrices type */ 3640 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3641 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3642 if (n_recvs) { 3643 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3644 ptr_idxs = recv_buffer_idxs; 3645 for (i=0;i<n_recvs;i++) { 3646 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3647 new_local_type_private = MATAIJ_PRIVATE; 3648 break; 3649 } 3650 ptr_idxs += olengths_idxs[i]; 3651 } 3652 switch (new_local_type_private) { 3653 case MATDENSE_PRIVATE: 3654 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3655 new_local_type = MATSEQAIJ; 3656 bs = 1; 3657 } else { /* if I receive only 1 dense matrix */ 3658 new_local_type = MATSEQDENSE; 3659 bs = 1; 3660 } 3661 break; 3662 case MATAIJ_PRIVATE: 3663 new_local_type = MATSEQAIJ; 3664 bs = 1; 3665 break; 3666 case MATBAIJ_PRIVATE: 3667 new_local_type = MATSEQBAIJ; 3668 break; 3669 case MATSBAIJ_PRIVATE: 3670 new_local_type = MATSEQSBAIJ; 3671 break; 3672 default: 3673 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3674 break; 3675 } 3676 } else { /* by default, new_local_type is seqdense */ 3677 new_local_type = MATSEQDENSE; 3678 bs = 1; 3679 } 3680 3681 /* create MATIS object if needed */ 3682 if (reuse == MAT_INITIAL_MATRIX) { 3683 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3684 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3685 } else { 3686 /* it also destroys the local matrices */ 3687 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3688 } 3689 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3690 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3691 3692 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3693 3694 /* Global to local map of received indices */ 3695 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3696 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3697 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3698 3699 /* restore attributes -> type of incoming data and its size */ 3700 buf_size_idxs = 0; 3701 for (i=0;i<n_recvs;i++) { 3702 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3703 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3704 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3705 } 3706 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3707 3708 /* set preallocation */ 3709 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3710 if (!newisdense) { 3711 PetscInt *new_local_nnz=0; 3712 3713 ptr_vals = recv_buffer_vals; 3714 ptr_idxs = recv_buffer_idxs_local; 3715 if (n_recvs) { 3716 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3717 } 3718 for (i=0;i<n_recvs;i++) { 3719 PetscInt j; 3720 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3721 for (j=0;j<*(ptr_idxs+1);j++) { 3722 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3723 } 3724 } else { 3725 /* TODO */ 3726 } 3727 ptr_idxs += olengths_idxs[i]; 3728 } 3729 if (new_local_nnz) { 3730 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3731 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3732 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3733 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3734 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3735 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3736 } else { 3737 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3738 } 3739 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3740 } else { 3741 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3742 } 3743 3744 /* set values */ 3745 ptr_vals = recv_buffer_vals; 3746 ptr_idxs = recv_buffer_idxs_local; 3747 for (i=0;i<n_recvs;i++) { 3748 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3749 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3750 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3751 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3752 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3753 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3754 } else { 3755 /* TODO */ 3756 } 3757 ptr_idxs += olengths_idxs[i]; 3758 ptr_vals += olengths_vals[i]; 3759 } 3760 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3761 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3762 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3763 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3764 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3765 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3766 3767 #if 0 3768 if (!restrict_comm) { /* check */ 3769 Vec lvec,rvec; 3770 PetscReal infty_error; 3771 3772 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3773 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3774 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3775 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3776 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3777 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3778 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3779 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3780 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3781 } 3782 #endif 3783 3784 /* assemble new additional is (if any) */ 3785 if (nis) { 3786 PetscInt **temp_idxs,*count_is,j,psum; 3787 3788 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3789 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3790 ptr_idxs = recv_buffer_idxs_is; 3791 psum = 0; 3792 for (i=0;i<n_recvs;i++) { 3793 for (j=0;j<nis;j++) { 3794 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3795 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3796 psum += plen; 3797 ptr_idxs += plen+1; /* shift pointer to received data */ 3798 } 3799 } 3800 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3801 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3802 for (i=1;i<nis;i++) { 3803 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3804 } 3805 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3806 ptr_idxs = recv_buffer_idxs_is; 3807 for (i=0;i<n_recvs;i++) { 3808 for (j=0;j<nis;j++) { 3809 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3810 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3811 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3812 ptr_idxs += plen+1; /* shift pointer to received data */ 3813 } 3814 } 3815 for (i=0;i<nis;i++) { 3816 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3817 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3818 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3819 } 3820 ierr = PetscFree(count_is);CHKERRQ(ierr); 3821 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3822 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3823 } 3824 /* free workspace */ 3825 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3826 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3827 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3828 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3829 if (isdense) { 3830 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3831 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3832 } else { 3833 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3834 } 3835 if (nis) { 3836 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3837 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3838 } 3839 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3840 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3841 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3842 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3843 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3844 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3845 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3846 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3847 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3848 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3849 ierr = PetscFree(onodes);CHKERRQ(ierr); 3850 if (nis) { 3851 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3852 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3853 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3854 } 3855 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3856 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3857 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3858 for (i=0;i<nis;i++) { 3859 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3860 } 3861 } 3862 PetscFunctionReturn(0); 3863 } 3864 3865 /* temporary hack into ksp private data structure */ 3866 #include <petsc-private/kspimpl.h> 3867 3868 #undef __FUNCT__ 3869 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3870 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3871 { 3872 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3873 PC_IS *pcis = (PC_IS*)pc->data; 3874 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3875 MatNullSpace CoarseNullSpace=NULL; 3876 ISLocalToGlobalMapping coarse_islg; 3877 IS coarse_is,*isarray; 3878 PetscInt i,im_active=-1,active_procs=-1; 3879 PetscInt nis,nisdofs,nisneu; 3880 PC pc_temp; 3881 PCType coarse_pc_type; 3882 KSPType coarse_ksp_type; 3883 PetscBool multilevel_requested,multilevel_allowed; 3884 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3885 Mat t_coarse_mat_is; 3886 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3887 PetscMPIInt all_procs; 3888 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3889 PetscBool compute_vecs = PETSC_FALSE; 3890 PetscScalar *array; 3891 PetscErrorCode ierr; 3892 3893 PetscFunctionBegin; 3894 /* Assign global numbering to coarse dofs */ 3895 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3896 PetscInt ocoarse_size; 3897 compute_vecs = PETSC_TRUE; 3898 ocoarse_size = pcbddc->coarse_size; 3899 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3900 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3901 /* see if we can avoid some work */ 3902 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3903 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3904 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3905 coarse_reuse = PETSC_FALSE; 3906 } else { /* we can safely reuse already computed coarse matrix */ 3907 coarse_reuse = PETSC_TRUE; 3908 } 3909 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3910 coarse_reuse = PETSC_FALSE; 3911 } 3912 /* reset any subassembling information */ 3913 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3914 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3915 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3916 coarse_reuse = PETSC_TRUE; 3917 } 3918 3919 /* count "active" (i.e. with positive local size) and "void" processes */ 3920 im_active = !!(pcis->n); 3921 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3922 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3923 void_procs = all_procs-active_procs; 3924 csin_type_simple = PETSC_TRUE; 3925 redist = PETSC_FALSE; 3926 if (pcbddc->current_level && void_procs) { 3927 csin_ml = PETSC_TRUE; 3928 ncoarse_ml = void_procs; 3929 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3930 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3931 csin_ds = PETSC_TRUE; 3932 ncoarse_ds = pcbddc->redistribute_coarse; 3933 redist = PETSC_TRUE; 3934 } else { 3935 csin_ds = PETSC_TRUE; 3936 ncoarse_ds = active_procs; 3937 redist = PETSC_TRUE; 3938 } 3939 } else { 3940 csin_ml = PETSC_FALSE; 3941 ncoarse_ml = all_procs; 3942 if (void_procs) { 3943 csin_ds = PETSC_TRUE; 3944 ncoarse_ds = void_procs; 3945 csin_type_simple = PETSC_FALSE; 3946 } else { 3947 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3948 csin_ds = PETSC_TRUE; 3949 ncoarse_ds = pcbddc->redistribute_coarse; 3950 redist = PETSC_TRUE; 3951 } else { 3952 csin_ds = PETSC_FALSE; 3953 ncoarse_ds = all_procs; 3954 } 3955 } 3956 } 3957 3958 /* 3959 test if we can go multilevel: three conditions must be satisfied: 3960 - we have not exceeded the number of levels requested 3961 - we can actually subassemble the active processes 3962 - we can find a suitable number of MPI processes where we can place the subassembled problem 3963 */ 3964 multilevel_allowed = PETSC_FALSE; 3965 multilevel_requested = PETSC_FALSE; 3966 if (pcbddc->current_level < pcbddc->max_levels) { 3967 multilevel_requested = PETSC_TRUE; 3968 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3969 multilevel_allowed = PETSC_FALSE; 3970 } else { 3971 multilevel_allowed = PETSC_TRUE; 3972 } 3973 } 3974 /* determine number of process partecipating to coarse solver */ 3975 if (multilevel_allowed) { 3976 ncoarse = ncoarse_ml; 3977 csin = csin_ml; 3978 redist = PETSC_FALSE; 3979 } else { 3980 ncoarse = ncoarse_ds; 3981 csin = csin_ds; 3982 } 3983 3984 /* creates temporary l2gmap and IS for coarse indexes */ 3985 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3986 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3987 3988 /* creates temporary MATIS object for coarse matrix */ 3989 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3990 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3991 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3992 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3993 #if 0 3994 { 3995 PetscViewer viewer; 3996 char filename[256]; 3997 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3998 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3999 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4000 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4001 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4002 } 4003 #endif 4004 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4005 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4006 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4007 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4008 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4009 4010 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4011 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4012 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4013 const PetscInt *idxs; 4014 ISLocalToGlobalMapping tmap; 4015 4016 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4017 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4018 /* allocate space for temporary storage */ 4019 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4020 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4021 /* allocate for IS array */ 4022 nisdofs = pcbddc->n_ISForDofsLocal; 4023 nisneu = !!pcbddc->NeumannBoundariesLocal; 4024 nis = nisdofs + nisneu; 4025 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4026 /* dofs splitting */ 4027 for (i=0;i<nisdofs;i++) { 4028 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4029 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4030 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4031 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4032 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4033 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4034 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4035 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4036 } 4037 /* neumann boundaries */ 4038 if (pcbddc->NeumannBoundariesLocal) { 4039 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4040 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4041 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4042 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4043 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4044 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4045 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4046 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4047 } 4048 /* free memory */ 4049 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4050 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4051 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4052 } else { 4053 nis = 0; 4054 nisdofs = 0; 4055 nisneu = 0; 4056 isarray = NULL; 4057 } 4058 /* destroy no longer needed map */ 4059 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4060 4061 /* restrict on coarse candidates (if needed) */ 4062 coarse_mat_is = NULL; 4063 if (csin) { 4064 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4065 if (redist) { 4066 PetscMPIInt rank; 4067 PetscInt spc,n_spc_p1,dest[1],destsize; 4068 4069 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4070 spc = active_procs/ncoarse; 4071 n_spc_p1 = active_procs%ncoarse; 4072 if (im_active) { 4073 destsize = 1; 4074 if (rank > n_spc_p1*(spc+1)-1) { 4075 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4076 } else { 4077 dest[0] = rank/(spc+1); 4078 } 4079 } else { 4080 destsize = 0; 4081 } 4082 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4083 } else if (csin_type_simple) { 4084 PetscMPIInt rank; 4085 PetscInt issize,isidx; 4086 4087 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4088 if (im_active) { 4089 issize = 1; 4090 isidx = (PetscInt)rank; 4091 } else { 4092 issize = 0; 4093 isidx = -1; 4094 } 4095 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4096 } else { /* get a suitable subassembling pattern from MATIS code */ 4097 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4098 } 4099 4100 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4101 if (!redist || ncoarse <= void_procs) { 4102 PetscInt ncoarse_cand,tissize,*nisindices; 4103 PetscInt *coarse_candidates; 4104 const PetscInt* tisindices; 4105 4106 /* get coarse candidates' ranks in pc communicator */ 4107 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4108 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4109 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4110 if (!coarse_candidates[i]) { 4111 coarse_candidates[ncoarse_cand++]=i; 4112 } 4113 } 4114 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4115 4116 4117 if (pcbddc->dbg_flag) { 4118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4120 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4121 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4122 for (i=0;i<ncoarse_cand;i++) { 4123 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4124 } 4125 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4126 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4127 } 4128 /* shift the pattern on coarse candidates */ 4129 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4130 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4131 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4132 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4133 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4134 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4135 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4136 } 4137 if (pcbddc->dbg_flag) { 4138 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4139 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4140 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4141 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4142 } 4143 } 4144 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4145 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4146 } else { 4147 if (pcbddc->dbg_flag) { 4148 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4150 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4151 } 4152 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4153 coarse_mat_is = t_coarse_mat_is; 4154 } 4155 4156 /* create local to global scatters for coarse problem */ 4157 if (compute_vecs) { 4158 PetscInt lrows; 4159 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4160 if (coarse_mat_is) { 4161 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4162 } else { 4163 lrows = 0; 4164 } 4165 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4166 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4167 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4168 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4169 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4170 } 4171 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4172 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4173 4174 /* set defaults for coarse KSP and PC */ 4175 if (multilevel_allowed) { 4176 coarse_ksp_type = KSPRICHARDSON; 4177 coarse_pc_type = PCBDDC; 4178 } else { 4179 coarse_ksp_type = KSPPREONLY; 4180 coarse_pc_type = PCREDUNDANT; 4181 } 4182 4183 /* print some info if requested */ 4184 if (pcbddc->dbg_flag) { 4185 if (!multilevel_allowed) { 4186 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4187 if (multilevel_requested) { 4188 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4189 } else if (pcbddc->max_levels) { 4190 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4191 } 4192 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4193 } 4194 } 4195 4196 /* create the coarse KSP object only once with defaults */ 4197 if (coarse_mat_is) { 4198 MatReuse coarse_mat_reuse; 4199 PetscViewer dbg_viewer = NULL; 4200 if (pcbddc->dbg_flag) { 4201 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4202 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4203 } 4204 if (!pcbddc->coarse_ksp) { 4205 char prefix[256],str_level[16]; 4206 size_t len; 4207 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4208 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4209 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4210 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4211 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4212 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4213 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4214 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4215 /* prefix */ 4216 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4217 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4218 if (!pcbddc->current_level) { 4219 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4220 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4221 } else { 4222 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4223 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4224 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4225 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4226 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4227 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4228 } 4229 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4230 /* allow user customization */ 4231 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4232 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4233 } 4234 4235 /* get some info after set from options */ 4236 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4237 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4238 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4239 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4240 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4241 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4242 isbddc = PETSC_FALSE; 4243 } 4244 if (isredundant) { 4245 KSP inner_ksp; 4246 PC inner_pc; 4247 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4248 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4249 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4250 } 4251 4252 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4253 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4254 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4255 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4256 if (nisdofs) { 4257 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4258 for (i=0;i<nisdofs;i++) { 4259 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4260 } 4261 } 4262 if (nisneu) { 4263 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4264 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4265 } 4266 4267 /* assemble coarse matrix */ 4268 if (coarse_reuse) { 4269 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4270 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4271 coarse_mat_reuse = MAT_REUSE_MATRIX; 4272 } else { 4273 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4274 } 4275 if (isbddc || isnn) { 4276 if (pcbddc->coarsening_ratio > 1) { 4277 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4278 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4279 if (pcbddc->dbg_flag) { 4280 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4281 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4282 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4283 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4284 } 4285 } 4286 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4287 } else { 4288 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4289 coarse_mat = coarse_mat_is; 4290 } 4291 } else { 4292 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4293 } 4294 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4295 4296 /* propagate symmetry info to coarse matrix */ 4297 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4298 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4299 4300 /* set operators */ 4301 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4302 if (pcbddc->dbg_flag) { 4303 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4304 } 4305 } else { /* processes non partecipating to coarse solver (if any) */ 4306 coarse_mat = 0; 4307 } 4308 ierr = PetscFree(isarray);CHKERRQ(ierr); 4309 #if 0 4310 { 4311 PetscViewer viewer; 4312 char filename[256]; 4313 sprintf(filename,"coarse_mat.m"); 4314 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4315 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4316 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4317 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4318 } 4319 #endif 4320 4321 /* Compute coarse null space (special handling by BDDC only) */ 4322 if (pcbddc->NullSpace) { 4323 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4324 } 4325 4326 if (pcbddc->coarse_ksp) { 4327 Vec crhs,csol; 4328 PetscBool ispreonly; 4329 if (CoarseNullSpace) { 4330 if (isbddc) { 4331 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4332 } else { 4333 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4334 } 4335 } 4336 /* setup coarse ksp */ 4337 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4338 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4339 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4340 /* hack */ 4341 if (!csol) { 4342 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4343 } 4344 if (!crhs) { 4345 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4346 } 4347 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4348 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4349 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4350 KSP check_ksp; 4351 KSPType check_ksp_type; 4352 PC check_pc; 4353 Vec check_vec,coarse_vec; 4354 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4355 PetscInt its; 4356 PetscBool compute_eigs; 4357 PetscReal *eigs_r,*eigs_c; 4358 PetscInt neigs; 4359 const char *prefix; 4360 4361 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4362 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4363 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4364 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4365 if (ispreonly) { 4366 check_ksp_type = KSPPREONLY; 4367 compute_eigs = PETSC_FALSE; 4368 } else { 4369 check_ksp_type = KSPGMRES; 4370 compute_eigs = PETSC_TRUE; 4371 } 4372 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4373 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4374 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4375 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4376 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4377 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4378 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4379 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4380 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4381 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4382 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4383 /* create random vec */ 4384 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4385 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4386 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4387 if (CoarseNullSpace) { 4388 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4389 } 4390 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4391 /* solve coarse problem */ 4392 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4393 if (CoarseNullSpace) { 4394 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4395 } 4396 /* set eigenvalue estimation if preonly has not been requested */ 4397 if (compute_eigs) { 4398 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4399 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4400 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4401 lambda_max = eigs_r[neigs-1]; 4402 lambda_min = eigs_r[0]; 4403 if (pcbddc->use_coarse_estimates) { 4404 if (lambda_max>lambda_min) { 4405 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4406 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4407 } 4408 } 4409 } 4410 4411 /* check coarse problem residual error */ 4412 if (pcbddc->dbg_flag) { 4413 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4414 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4415 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4416 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4417 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4418 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4419 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4420 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4421 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4422 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4423 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4424 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4425 if (compute_eigs) { 4426 PetscReal lambda_max_s,lambda_min_s; 4427 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4428 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4429 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4430 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4431 for (i=0;i<neigs;i++) { 4432 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4433 } 4434 } 4435 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4436 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4437 } 4438 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4439 if (compute_eigs) { 4440 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4441 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4442 } 4443 } 4444 } 4445 /* print additional info */ 4446 if (pcbddc->dbg_flag) { 4447 /* waits until all processes reaches this point */ 4448 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4449 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4450 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4451 } 4452 4453 /* free memory */ 4454 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4455 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4456 PetscFunctionReturn(0); 4457 } 4458 4459 #undef __FUNCT__ 4460 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4461 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4462 { 4463 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4464 PC_IS* pcis = (PC_IS*)pc->data; 4465 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4466 PetscInt i,coarse_size; 4467 PetscInt *local_primal_indices; 4468 PetscErrorCode ierr; 4469 4470 PetscFunctionBegin; 4471 /* Compute global number of coarse dofs */ 4472 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4473 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4474 } 4475 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4476 4477 /* check numbering */ 4478 if (pcbddc->dbg_flag) { 4479 PetscScalar coarsesum,*array; 4480 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4481 4482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4483 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4484 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4485 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4486 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4487 for (i=0;i<pcbddc->local_primal_size;i++) { 4488 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4489 } 4490 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4491 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4492 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4493 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4494 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4495 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4496 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4497 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4498 for (i=0;i<pcis->n;i++) { 4499 if (array[i] == 1.0) { 4500 set_error = PETSC_TRUE; 4501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4502 } 4503 } 4504 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4505 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4506 for (i=0;i<pcis->n;i++) { 4507 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4508 } 4509 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4510 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4511 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4512 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4513 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4514 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4515 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4516 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4517 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4519 for (i=0;i<pcbddc->local_primal_size;i++) { 4520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4521 } 4522 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4523 } 4524 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4525 if (set_error_reduced) { 4526 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4527 } 4528 } 4529 /* get back data */ 4530 *coarse_size_n = coarse_size; 4531 *local_primal_indices_n = local_primal_indices; 4532 PetscFunctionReturn(0); 4533 } 4534 4535 #undef __FUNCT__ 4536 #define __FUNCT__ "PCBDDCGlobalToLocal" 4537 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4538 { 4539 IS localis_t; 4540 PetscInt i,lsize,*idxs,n; 4541 PetscScalar *vals; 4542 PetscErrorCode ierr; 4543 4544 PetscFunctionBegin; 4545 /* get indices in local ordering exploiting local to global map */ 4546 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4547 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4548 for (i=0;i<lsize;i++) vals[i] = 1.0; 4549 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4550 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4551 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4552 if (idxs) { /* multilevel guard */ 4553 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4554 } 4555 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4556 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4557 ierr = PetscFree(vals);CHKERRQ(ierr); 4558 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4559 /* now compute set in local ordering */ 4560 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4561 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4562 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4563 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4564 for (i=0,lsize=0;i<n;i++) { 4565 if (PetscRealPart(vals[i]) > 0.5) { 4566 lsize++; 4567 } 4568 } 4569 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4570 for (i=0,lsize=0;i<n;i++) { 4571 if (PetscRealPart(vals[i]) > 0.5) { 4572 idxs[lsize++] = i; 4573 } 4574 } 4575 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4576 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4577 *localis = localis_t; 4578 PetscFunctionReturn(0); 4579 } 4580 4581 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4582 #undef __FUNCT__ 4583 #define __FUNCT__ "PCBDDCMatMult_Private" 4584 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4585 { 4586 PCBDDCChange_ctx change_ctx; 4587 PetscErrorCode ierr; 4588 4589 PetscFunctionBegin; 4590 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4591 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4592 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4593 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4594 PetscFunctionReturn(0); 4595 } 4596 4597 #undef __FUNCT__ 4598 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4599 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4600 { 4601 PCBDDCChange_ctx change_ctx; 4602 PetscErrorCode ierr; 4603 4604 PetscFunctionBegin; 4605 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4606 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4607 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4608 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4609 PetscFunctionReturn(0); 4610 } 4611 4612 #undef __FUNCT__ 4613 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4614 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4615 { 4616 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4617 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4618 PetscInt *used_xadj,*used_adjncy; 4619 PetscBool free_used_adj; 4620 PetscErrorCode ierr; 4621 4622 PetscFunctionBegin; 4623 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4624 free_used_adj = PETSC_FALSE; 4625 if (pcbddc->sub_schurs_layers == -1) { 4626 used_xadj = NULL; 4627 used_adjncy = NULL; 4628 } else { 4629 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4630 used_xadj = pcbddc->mat_graph->xadj; 4631 used_adjncy = pcbddc->mat_graph->adjncy; 4632 } else if (pcbddc->computed_rowadj) { 4633 used_xadj = pcbddc->mat_graph->xadj; 4634 used_adjncy = pcbddc->mat_graph->adjncy; 4635 } else { 4636 PetscBool flg_row=PETSC_FALSE; 4637 const PetscInt *xadj,*adjncy; 4638 PetscInt nvtxs; 4639 4640 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4641 if (flg_row) { 4642 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4643 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4644 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4645 free_used_adj = PETSC_TRUE; 4646 } else { 4647 pcbddc->sub_schurs_layers = -1; 4648 used_xadj = NULL; 4649 used_adjncy = NULL; 4650 } 4651 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4652 } 4653 } 4654 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4655 4656 /* free adjacency */ 4657 if (free_used_adj) { 4658 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4659 } 4660 PetscFunctionReturn(0); 4661 } 4662 4663 #undef __FUNCT__ 4664 #define __FUNCT__ "PCBDDCInitSubSchurs" 4665 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4666 { 4667 PC_IS *pcis=(PC_IS*)pc->data; 4668 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4669 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4670 PCBDDCGraph graph; 4671 Mat S_j; 4672 PetscErrorCode ierr; 4673 4674 PetscFunctionBegin; 4675 /* attach interface graph for determining subsets */ 4676 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4677 IS verticesIS; 4678 4679 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4680 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4681 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4682 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4683 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4684 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4685 /* 4686 if (pcbddc->dbg_flag) { 4687 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4688 } 4689 */ 4690 } else { 4691 graph = pcbddc->mat_graph; 4692 } 4693 4694 /* Create Schur complement matrix */ 4695 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4696 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4697 4698 /* sub_schurs init */ 4699 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4700 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4701 /* free graph struct */ 4702 if (pcbddc->sub_schurs_rebuild) { 4703 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4704 } 4705 PetscFunctionReturn(0); 4706 } 4707