1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 60 nmax = PetscMax(nmin,nmax); 61 if (pcbddc->adaptive_threshold > 1.0) { 62 thresh = 1.0/pcbddc->adaptive_threshold; 63 } else { 64 thresh = 1.0; 65 } 66 67 /* allocate lapack workspace */ 68 cum = cum2 = 0; 69 maxneigs = 0; 70 for (i=0;i<sub_schurs->n_subs;i++) { 71 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 72 PetscInt n,subset_size; 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size*n; 76 cum2 += n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 81 if (mss) { 82 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 83 PetscBLASInt B_itype = 1; 84 PetscBLASInt B_N = mss; 85 PetscReal zero = 0.0; 86 PetscReal eps = 0.0; /* dlamch? */ 87 88 B_lwork = -1; 89 S = NULL; 90 St = NULL; 91 eigs = NULL; 92 eigv = NULL; 93 B_iwork = NULL; 94 B_ifail = NULL; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 112 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 116 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 117 #if defined(PETSC_USE_COMPLEX) 118 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 119 #endif 120 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 121 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 122 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 123 nv+cum,&pcbddc->adaptive_constraints_idxs, 124 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 125 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 126 127 maxneigs = 0; 128 cum = cum2 = cumarray = 0; 129 if (sub_schurs->is_Ej_com) { 130 const PetscInt *idxs; 131 132 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 133 for (cum=0;cum<nv;cum++) { 134 pcbddc->adaptive_constraints_n[cum] = 1; 135 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 136 pcbddc->adaptive_constraints_ptrs[cum] = cum; 137 pcbddc->adaptive_constraints_data[cum] = 1.0; 138 } 139 cum2 = cum; 140 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 141 } 142 143 if (mss) { /* multilevel */ 144 if (pcbddc->use_deluxe_scaling) { 145 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 146 } else { 147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 148 } 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 150 } 151 152 for (i=0;i<sub_schurs->n_subs;i++) { 153 PetscInt j,subset_size; 154 155 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 156 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 157 const PetscInt *idxs; 158 PetscScalar one = 1.0,scalar_zero = 0.0; 159 PetscReal zero=0.0; 160 PetscBLASInt B_N; 161 162 /* S should be copied since we need it for deluxe scaling */ 163 if (sub_schurs->is_hermitian) { 164 PetscInt j; 165 for (j=0;j<subset_size;j++) { 166 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 167 } 168 for (j=0;j<subset_size;j++) { 169 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 170 } 171 } else { 172 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 173 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 174 } 175 /* is always this the right matrix? */ 176 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 177 178 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 179 /* St = Starray+cumarray; */ 180 181 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 182 PetscBLASInt B_itype = 1; 183 PetscBLASInt B_IL = 1, B_IU; 184 PetscReal eps = -1.0; /* dlamch? */ 185 PetscInt nmin_s; 186 187 /* ask for eigenvalues lower than thresh */ 188 if (pcbddc->dbg_flag) { 189 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d.\n",i,sub_schurs->n_subs); 190 } 191 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 192 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 193 #if defined(PETSC_USE_COMPLEX) 194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 195 #else 196 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 197 #endif 198 ierr = PetscFPTrapPop();CHKERRQ(ierr); 199 if (B_ierr) { 200 if (B_ierr < 0 ) { 201 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 202 } else if (B_ierr <= B_N) { 203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 204 } else { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 206 } 207 } 208 209 if (B_neigs > nmax) { 210 if (pcbddc->dbg_flag) { 211 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 212 } 213 B_neigs = nmax; 214 } 215 216 nmin_s = PetscMin(nmin,B_N); 217 if (B_neigs < nmin_s) { 218 PetscBLASInt B_neigs2; 219 220 B_IL = B_neigs + 1; 221 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 222 if (pcbddc->dbg_flag) { 223 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 224 } 225 if (sub_schurs->is_hermitian) { 226 PetscInt j; 227 for (j=0;j<subset_size;j++) { 228 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 229 } 230 for (j=0;j<subset_size;j++) { 231 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 232 } 233 } else { 234 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 235 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 236 } 237 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 238 #if defined(PETSC_USE_COMPLEX) 239 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 240 #else 241 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 242 #endif 243 ierr = PetscFPTrapPop();CHKERRQ(ierr); 244 B_neigs += B_neigs2; 245 } 246 if (B_ierr) { 247 if (B_ierr < 0 ) { 248 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 249 } else if (B_ierr <= B_N) { 250 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 251 } else { 252 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 253 } 254 } 255 if (pcbddc->dbg_flag) { 256 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 257 for (j=0;j<B_neigs;j++) { 258 if (eigs[j] == 0.0) { 259 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 260 } else { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1.0/eigs[j]); 262 } 263 } 264 } 265 } else { 266 /* TODO */ 267 } 268 maxneigs = PetscMax(B_neigs,maxneigs); 269 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 270 271 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 272 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 273 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 274 275 if (pcbddc->dbg_flag > 1) { 276 PetscInt ii; 277 for (ii=0;ii<B_neigs;ii++) { 278 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 279 for (j=0;j<B_N;j++) { 280 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]); 281 } 282 } 283 } 284 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 285 for (j=0;j<B_neigs;j++) { 286 #if 0 287 { 288 PetscBLASInt Blas_N,Blas_one = 1.0; 289 PetscScalar norm; 290 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 291 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 292 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 293 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 294 } else { 295 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 296 } 297 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 298 } 299 #endif 300 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 301 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 302 cum2 += subset_size; 303 } 304 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 305 } 306 /* shift for next computation */ 307 cumarray += subset_size*subset_size; 308 } 309 if (pcbddc->dbg_flag) { 310 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 311 } 312 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 313 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 314 315 if (mss) { 316 if (pcbddc->use_deluxe_scaling) { 317 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 318 } else { 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 320 } 321 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 322 } 323 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 324 #if defined(PETSC_USE_COMPLEX) 325 ierr = PetscFree(rwork);CHKERRQ(ierr); 326 #endif 327 if (pcbddc->dbg_flag) { 328 PetscInt maxneigs_r; 329 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 330 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 331 } 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "PCBDDCSetUpSolvers" 337 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 338 { 339 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 340 PetscScalar *coarse_submat_vals; 341 PetscErrorCode ierr; 342 343 PetscFunctionBegin; 344 /* Setup local scatters R_to_B and (optionally) R_to_D */ 345 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 346 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 347 348 /* Setup local neumann solver ksp_R */ 349 /* PCBDDCSetUpLocalScatters should be called first! */ 350 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 351 352 /* Change global null space passed in by the user if change of basis has been requested */ 353 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 354 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 355 } 356 357 /* 358 Setup local correction and local part of coarse basis. 359 Gives back the dense local part of the coarse matrix in column major ordering 360 */ 361 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 362 363 /* Compute total number of coarse nodes and setup coarse solver */ 364 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 365 366 /* free */ 367 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "PCBDDCResetCustomization" 373 PetscErrorCode PCBDDCResetCustomization(PC pc) 374 { 375 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 376 PetscErrorCode ierr; 377 378 PetscFunctionBegin; 379 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 380 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 381 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 383 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 386 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 388 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 389 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "PCBDDCResetTopography" 395 PetscErrorCode PCBDDCResetTopography(PC pc) 396 { 397 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 398 PetscErrorCode ierr; 399 400 PetscFunctionBegin; 401 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 403 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 404 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 405 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "PCBDDCResetSolvers" 411 PetscErrorCode PCBDDCResetSolvers(PC pc) 412 { 413 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 414 PetscScalar *array; 415 PetscErrorCode ierr; 416 417 PetscFunctionBegin; 418 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 419 if (pcbddc->coarse_phi_B) { 420 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 421 ierr = PetscFree(array);CHKERRQ(ierr); 422 } 423 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 424 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 425 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 426 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 427 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 428 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 429 if (pcbddc->local_auxmat2) { 430 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 431 ierr = PetscFree(array);CHKERRQ(ierr); 432 } 433 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 437 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 440 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 443 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 446 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 447 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 454 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 455 { 456 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 457 PC_IS *pcis = (PC_IS*)pc->data; 458 VecType impVecType; 459 PetscInt n_constraints,n_R,old_size; 460 PetscErrorCode ierr; 461 462 PetscFunctionBegin; 463 if (!pcbddc->ConstraintMatrix) { 464 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 465 } 466 /* get sizes */ 467 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 468 n_R = pcis->n-pcbddc->n_actual_vertices; 469 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 470 /* local work vectors (try to avoid unneeded work)*/ 471 /* R nodes */ 472 old_size = -1; 473 if (pcbddc->vec1_R) { 474 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 475 } 476 if (n_R != old_size) { 477 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 478 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 479 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 480 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 481 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 482 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 483 } 484 /* local primal dofs */ 485 old_size = -1; 486 if (pcbddc->vec1_P) { 487 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 488 } 489 if (pcbddc->local_primal_size != old_size) { 490 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 491 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 492 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 493 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 494 } 495 /* local explicit constraints */ 496 old_size = -1; 497 if (pcbddc->vec1_C) { 498 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 499 } 500 if (n_constraints && n_constraints != old_size) { 501 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 502 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 503 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 504 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 505 } 506 PetscFunctionReturn(0); 507 } 508 509 #undef __FUNCT__ 510 #define __FUNCT__ "PCBDDCSetUpCorrection" 511 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 512 { 513 PetscErrorCode ierr; 514 /* pointers to pcis and pcbddc */ 515 PC_IS* pcis = (PC_IS*)pc->data; 516 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 517 /* submatrices of local problem */ 518 Mat A_RV,A_VR,A_VV; 519 /* submatrices of local coarse problem */ 520 Mat S_VV,S_CV,S_VC,S_CC; 521 /* working matrices */ 522 Mat C_CR; 523 /* additional working stuff */ 524 PC pc_R; 525 Mat F; 526 PetscBool isLU,isCHOL,isILU; 527 528 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 529 PetscScalar *work; 530 PetscInt *idx_V_B; 531 PetscInt n,n_vertices,n_constraints; 532 PetscInt i,n_R,n_D,n_B; 533 PetscBool unsymmetric_check; 534 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 535 MatType impMatType; 536 /* some shortcuts to scalars */ 537 PetscScalar one=1.0,m_one=-1.0; 538 539 PetscFunctionBegin; 540 /* get number of vertices (corners plus constraints with change of basis) 541 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 542 n_vertices = pcbddc->n_actual_vertices; 543 n_constraints = pcbddc->local_primal_size-n_vertices; 544 /* Set Non-overlapping dimensions */ 545 n_B = pcis->n_B; n_D = pcis->n - n_B; 546 n_R = pcis->n-n_vertices; 547 548 /* Set types for local objects needed by BDDC precondtioner */ 549 impMatType = MATSEQDENSE; 550 551 /* vertices in boundary numbering */ 552 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 553 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 554 if (i != n_vertices) { 555 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 556 } 557 558 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 559 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 560 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 561 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 563 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 565 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 566 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 567 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 568 569 unsymmetric_check = PETSC_FALSE; 570 /* allocate workspace */ 571 n = 0; 572 if (n_constraints) { 573 n += n_R*n_constraints; 574 } 575 if (n_vertices) { 576 n = PetscMax(2*n_R*n_vertices,n); 577 } 578 if (!pcbddc->issym) { 579 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 580 unsymmetric_check = PETSC_TRUE; 581 } 582 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 583 584 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 585 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 586 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 587 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 589 if (isLU || isILU || isCHOL) { 590 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 591 } else { 592 F = NULL; 593 } 594 595 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 596 if (n_constraints) { 597 Mat M1,M2,M3; 598 IS is_aux; 599 /* see if we can save some allocations */ 600 if (pcbddc->local_auxmat2) { 601 PetscInt on_R,on_constraints; 602 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 603 if (on_R != n_R || on_constraints != n_constraints) { 604 PetscScalar *marray; 605 606 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 607 ierr = PetscFree(marray);CHKERRQ(ierr); 608 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 609 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 610 } 611 } 612 /* auxiliary matrices */ 613 if (!pcbddc->local_auxmat2) { 614 PetscScalar *marray; 615 616 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 617 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 618 marray += n_R*n_constraints; 619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 620 } 621 622 /* Extract constraints on R nodes: C_{CR} */ 623 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 624 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 625 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 626 627 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 628 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 629 for (i=0;i<n_constraints;i++) { 630 const PetscScalar *row_cmat_values; 631 const PetscInt *row_cmat_indices; 632 PetscInt size_of_constraint,j; 633 634 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 635 for (j=0;j<size_of_constraint;j++) { 636 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 637 } 638 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 639 } 640 if (F) { 641 Mat B; 642 643 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 644 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 645 ierr = MatDestroy(&B);CHKERRQ(ierr); 646 } else { 647 PetscScalar *xarray; 648 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 649 for (i=0;i<n_constraints;i++) { 650 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 651 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 652 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 653 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 654 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 655 } 656 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 657 } 658 659 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 660 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 661 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 662 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 663 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 664 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 665 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 666 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 667 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 668 ierr = MatDestroy(&M2);CHKERRQ(ierr); 669 ierr = MatDestroy(&M3);CHKERRQ(ierr); 670 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 671 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 672 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 673 ierr = MatDestroy(&M1);CHKERRQ(ierr); 674 } 675 /* Get submatrices from subdomain matrix */ 676 if (n_vertices) { 677 Mat newmat; 678 IS is_aux; 679 PetscInt ibs,mbs; 680 PetscBool issbaij; 681 682 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 683 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 684 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 685 if (ibs != mbs) { /* need to convert to SEQAIJ */ 686 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 687 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 688 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 689 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 690 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 691 } else { 692 /* this is safe */ 693 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 694 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 695 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 696 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 697 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 698 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 699 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 700 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 701 } else { 702 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 703 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 704 } 705 } 706 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 707 } 708 709 /* Matrix of coarse basis functions (local) */ 710 if (pcbddc->coarse_phi_B) { 711 PetscInt on_B,on_primal,on_D=n_D; 712 if (pcbddc->coarse_phi_D) { 713 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 714 } 715 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 716 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 717 PetscScalar *marray; 718 719 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 720 ierr = PetscFree(marray);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 722 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 723 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 724 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 725 } 726 } 727 728 if (!pcbddc->coarse_phi_B) { 729 PetscScalar *marray; 730 731 n = n_B*pcbddc->local_primal_size; 732 if (pcbddc->switch_static || pcbddc->dbg_flag) { 733 n += n_D*pcbddc->local_primal_size; 734 } 735 if (!pcbddc->issym) { 736 n *= 2; 737 } 738 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 739 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 740 n = n_B*pcbddc->local_primal_size; 741 if (pcbddc->switch_static || pcbddc->dbg_flag) { 742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 743 n += n_D*pcbddc->local_primal_size; 744 } 745 if (!pcbddc->issym) { 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 747 if (pcbddc->switch_static || pcbddc->dbg_flag) { 748 n = n_B*pcbddc->local_primal_size; 749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 750 } 751 } else { 752 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 753 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 754 if (pcbddc->switch_static || pcbddc->dbg_flag) { 755 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 756 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 757 } 758 } 759 } 760 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 761 /* vertices */ 762 if (n_vertices) { 763 764 if (n_R) { 765 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 766 PetscBLASInt B_N,B_one = 1; 767 PetscScalar *x,*y; 768 769 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 770 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 771 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 772 if (F) { 773 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 774 } else { 775 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 776 for (i=0;i<n_vertices;i++) { 777 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 778 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 779 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 780 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 781 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 782 } 783 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 784 } 785 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 786 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 787 if (n_constraints) { 788 Mat B; 789 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 790 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 791 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 792 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 793 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 794 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 795 ierr = MatDestroy(&B);CHKERRQ(ierr); 796 } 797 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 798 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 799 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 800 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 801 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 802 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 803 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 804 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 805 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 806 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 807 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 808 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 809 } else { 810 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 811 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 812 } 813 /* coarse basis functions */ 814 for (i=0;i<n_vertices;i++) { 815 PetscScalar *y; 816 817 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 818 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 819 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 820 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 821 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 y[n_B*i+idx_V_B[i]] = 1.0; 823 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 824 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 825 826 if (pcbddc->switch_static || pcbddc->dbg_flag) { 827 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 828 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 829 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 832 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 833 } 834 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 835 } 836 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 837 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 838 } 839 840 if (n_constraints) { 841 Mat B; 842 843 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 844 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 845 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 846 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 847 if (n_vertices) { 848 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 849 } 850 ierr = MatDestroy(&B);CHKERRQ(ierr); 851 /* coarse basis functions */ 852 for (i=0;i<n_constraints;i++) { 853 PetscScalar *y; 854 855 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 856 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 857 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 858 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 861 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 862 863 if (pcbddc->switch_static || pcbddc->dbg_flag) { 864 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 865 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 866 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 869 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 870 } 871 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 872 } 873 } 874 875 /* compute other basis functions for non-symmetric problems */ 876 if (!pcbddc->issym) { 877 Mat B,X; 878 879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 880 881 if (n_constraints) { 882 Mat S_CCT,B_C; 883 884 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 885 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 886 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 887 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 888 if (n_vertices) { 889 Mat B_V,S_VCT; 890 891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 892 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 893 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 894 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 895 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 896 } 897 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 898 } 899 if (n_vertices && n_R) { 900 Mat A_VRT; 901 PetscBLASInt B_N,B_one = 1; 902 903 if (!n_constraints) { /* if there are no constraints, reset work */ 904 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 905 } 906 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 907 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 908 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 909 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 910 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 911 } 912 913 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 914 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 915 for (i=0;i<pcbddc->local_primal_size;i++) { 916 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 917 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 918 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 919 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 920 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 921 } 922 } else { 923 for (i=0;i<pcbddc->local_primal_size;i++) { 924 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 925 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 926 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 927 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 928 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 929 } 930 } 931 ierr = MatDestroy(&B);CHKERRQ(ierr); 932 /* coarse basis functions */ 933 for (i=0;i<pcbddc->local_primal_size;i++) { 934 PetscScalar *y; 935 936 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 937 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 938 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 939 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 if (i<n_vertices) { 942 y[n_B*i+idx_V_B[i]] = 1.0; 943 } 944 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 945 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 946 947 if (pcbddc->switch_static || pcbddc->dbg_flag) { 948 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 949 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 950 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 953 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 954 } 955 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 956 } 957 ierr = MatDestroy(&X);CHKERRQ(ierr); 958 } 959 960 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 961 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 962 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 963 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 964 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 965 /* Checking coarse_sub_mat and coarse basis functios */ 966 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 967 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 968 if (pcbddc->dbg_flag) { 969 Mat coarse_sub_mat; 970 Mat AUXMAT,TM1,TM2,TM3,TM4; 971 Mat coarse_phi_D,coarse_phi_B; 972 Mat coarse_psi_D,coarse_psi_B; 973 Mat A_II,A_BB,A_IB,A_BI; 974 MatType checkmattype=MATSEQAIJ; 975 PetscReal real_value; 976 977 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 978 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 979 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 980 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 981 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 982 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 983 if (unsymmetric_check) { 984 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 985 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 986 } 987 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 988 989 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 990 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 992 if (unsymmetric_check) { 993 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 994 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 995 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 996 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 997 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 998 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 999 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1000 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1001 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1002 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1003 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1004 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1005 } else { 1006 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1007 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1008 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1009 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1010 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1011 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1012 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1013 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1014 } 1015 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1016 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1017 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1018 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1019 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1020 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 1021 1022 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1023 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1024 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1025 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1026 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1027 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1028 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1029 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1030 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1031 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1032 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1033 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1034 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1035 if (unsymmetric_check) { 1036 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1037 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1038 } 1039 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1040 } 1041 1042 /* free memory */ 1043 ierr = PetscFree(work);CHKERRQ(ierr); 1044 if (n_vertices) { 1045 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1046 } 1047 if (n_constraints) { 1048 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1049 } 1050 /* get back data */ 1051 *coarse_submat_vals_n = coarse_submat_vals; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 #undef __FUNCT__ 1056 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1057 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1058 { 1059 Mat *work_mat; 1060 IS isrow_s,iscol_s; 1061 PetscBool rsorted,csorted; 1062 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1063 PetscErrorCode ierr; 1064 1065 PetscFunctionBegin; 1066 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1067 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1068 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1069 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1070 1071 if (!rsorted) { 1072 const PetscInt *idxs; 1073 PetscInt *idxs_sorted,i; 1074 1075 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1077 for (i=0;i<rsize;i++) { 1078 idxs_perm_r[i] = i; 1079 } 1080 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1081 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1082 for (i=0;i<rsize;i++) { 1083 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1084 } 1085 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1086 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1087 } else { 1088 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1089 isrow_s = isrow; 1090 } 1091 1092 if (!csorted) { 1093 if (isrow == iscol) { 1094 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1095 iscol_s = isrow_s; 1096 } else { 1097 const PetscInt *idxs; 1098 PetscInt *idxs_sorted,i; 1099 1100 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1101 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1102 for (i=0;i<csize;i++) { 1103 idxs_perm_c[i] = i; 1104 } 1105 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1106 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1107 for (i=0;i<csize;i++) { 1108 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1109 } 1110 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1111 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1112 } 1113 } else { 1114 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1115 iscol_s = iscol; 1116 } 1117 1118 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1119 1120 if (!rsorted || !csorted) { 1121 Mat new_mat; 1122 IS is_perm_r,is_perm_c; 1123 1124 if (!rsorted) { 1125 PetscInt *idxs_r,i; 1126 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1127 for (i=0;i<rsize;i++) { 1128 idxs_r[idxs_perm_r[i]] = i; 1129 } 1130 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1131 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1132 } else { 1133 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1134 } 1135 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1136 1137 if (!csorted) { 1138 if (isrow_s == iscol_s) { 1139 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1140 is_perm_c = is_perm_r; 1141 } else { 1142 PetscInt *idxs_c,i; 1143 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1144 for (i=0;i<csize;i++) { 1145 idxs_c[idxs_perm_c[i]] = i; 1146 } 1147 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1148 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1149 } 1150 } else { 1151 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1152 } 1153 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1154 1155 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1156 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1157 work_mat[0] = new_mat; 1158 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1159 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1160 } 1161 1162 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1163 *B = work_mat[0]; 1164 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1165 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1166 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1172 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1173 { 1174 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1175 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1176 Mat new_mat; 1177 IS is_local,is_global; 1178 PetscInt local_size; 1179 PetscBool isseqaij; 1180 PetscErrorCode ierr; 1181 1182 PetscFunctionBegin; 1183 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1184 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1185 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1186 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1187 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1188 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1189 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1190 1191 /* check */ 1192 if (pcbddc->dbg_flag) { 1193 Vec x,x_change; 1194 PetscReal error; 1195 1196 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1197 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1198 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1199 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1200 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1201 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1202 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1203 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1204 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1205 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1206 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1207 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1208 ierr = VecDestroy(&x);CHKERRQ(ierr); 1209 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1210 } 1211 1212 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1213 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1214 if (isseqaij) { 1215 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1216 } else { 1217 Mat work_mat; 1218 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1219 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1220 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1221 } 1222 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1223 #if !defined(PETSC_USE_COMPLEX) 1224 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1225 #endif 1226 /* 1227 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1228 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1229 */ 1230 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1236 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1237 { 1238 PC_IS* pcis = (PC_IS*)(pc->data); 1239 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1240 IS is_aux1,is_aux2; 1241 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1242 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1243 PetscInt vbs,bs; 1244 PetscBT bitmask; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 /* 1249 No need to setup local scatters if 1250 - primal space is unchanged 1251 AND 1252 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1253 AND 1254 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1255 */ 1256 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1257 PetscFunctionReturn(0); 1258 } 1259 /* destroy old objects */ 1260 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1261 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1262 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1263 /* Set Non-overlapping dimensions */ 1264 n_B = pcis->n_B; n_D = pcis->n - n_B; 1265 n_vertices = pcbddc->n_actual_vertices; 1266 /* create auxiliary bitmask */ 1267 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1268 for (i=0;i<n_vertices;i++) { 1269 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1270 } 1271 1272 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1273 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1274 for (i=0, n_R=0; i<pcis->n; i++) { 1275 if (!PetscBTLookup(bitmask,i)) { 1276 idx_R_local[n_R] = i; 1277 n_R++; 1278 } 1279 } 1280 1281 /* Block code */ 1282 vbs = 1; 1283 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1284 if (bs>1 && !(n_vertices%bs)) { 1285 PetscBool is_blocked = PETSC_TRUE; 1286 PetscInt *vary; 1287 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1288 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1289 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1290 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1291 for (i=0; i<n_vertices/bs; i++) { 1292 if (vary[i]!=0 && vary[i]!=bs) { 1293 is_blocked = PETSC_FALSE; 1294 break; 1295 } 1296 } 1297 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1298 vbs = bs; 1299 for (i=0;i<n_R/vbs;i++) { 1300 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1301 } 1302 } 1303 ierr = PetscFree(vary);CHKERRQ(ierr); 1304 } 1305 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1306 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1307 1308 /* print some info if requested */ 1309 if (pcbddc->dbg_flag) { 1310 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1311 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1313 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1318 } 1319 1320 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1321 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1322 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1323 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1324 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1325 for (i=0; i<n_D; i++) { 1326 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1327 } 1328 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1329 for (i=0, j=0; i<n_R; i++) { 1330 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1331 aux_array1[j++] = i; 1332 } 1333 } 1334 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1335 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1336 for (i=0, j=0; i<n_B; i++) { 1337 if (!PetscBTLookup(bitmask,is_indices[i])) { 1338 aux_array2[j++] = i; 1339 } 1340 } 1341 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1342 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1343 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1344 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1345 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1346 1347 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1348 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1349 for (i=0, j=0; i<n_R; i++) { 1350 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1351 aux_array1[j++] = i; 1352 } 1353 } 1354 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1355 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1356 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1357 } 1358 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1359 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1366 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1367 { 1368 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1369 PC_IS *pcis = (PC_IS*)pc->data; 1370 PC pc_temp; 1371 Mat A_RR; 1372 MatReuse reuse; 1373 PetscScalar m_one = -1.0; 1374 PetscReal value; 1375 PetscInt n_D,n_R,ibs,mbs; 1376 PetscBool use_exact,use_exact_reduced,issbaij; 1377 PetscErrorCode ierr; 1378 /* prefixes stuff */ 1379 char dir_prefix[256],neu_prefix[256],str_level[16]; 1380 size_t len; 1381 1382 PetscFunctionBegin; 1383 1384 /* compute prefixes */ 1385 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1386 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1387 if (!pcbddc->current_level) { 1388 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1389 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1390 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1391 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1392 } else { 1393 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1394 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1395 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1396 len -= 15; /* remove "pc_bddc_coarse_" */ 1397 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1398 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1399 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1400 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1401 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1402 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1403 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1404 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1405 } 1406 1407 /* DIRICHLET PROBLEM */ 1408 if (dirichlet) { 1409 if (pcbddc->issym) { 1410 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1411 } 1412 /* Matrix for Dirichlet problem is pcis->A_II */ 1413 n_D = pcis->n - pcis->n_B; 1414 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1415 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1416 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1417 /* default */ 1418 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1419 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1420 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1421 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1422 if (issbaij) { 1423 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1424 } else { 1425 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1426 } 1427 /* Allow user's customization */ 1428 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1429 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1430 } 1431 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1432 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1433 if (!n_D) { 1434 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1435 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1436 } 1437 /* Set Up KSP for Dirichlet problem of BDDC */ 1438 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1439 /* set ksp_D into pcis data */ 1440 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1441 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1442 pcis->ksp_D = pcbddc->ksp_D; 1443 } 1444 1445 /* NEUMANN PROBLEM */ 1446 A_RR = 0; 1447 if (neumann) { 1448 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1449 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1450 if (pcbddc->ksp_R) { /* already created ksp */ 1451 PetscInt nn_R; 1452 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1453 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1454 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1455 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1456 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1457 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1458 reuse = MAT_INITIAL_MATRIX; 1459 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1460 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1461 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1462 reuse = MAT_INITIAL_MATRIX; 1463 } else { /* safe to reuse the matrix */ 1464 reuse = MAT_REUSE_MATRIX; 1465 } 1466 } 1467 /* last check */ 1468 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1469 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1470 reuse = MAT_INITIAL_MATRIX; 1471 } 1472 } else { /* first time, so we need to create the matrix */ 1473 reuse = MAT_INITIAL_MATRIX; 1474 } 1475 /* extract A_RR */ 1476 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1477 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1478 if (ibs != mbs) { 1479 Mat newmat; 1480 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1481 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1482 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1483 } else { 1484 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1485 } 1486 if (pcbddc->issym) { 1487 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1488 } 1489 if (!pcbddc->ksp_R) { /* create object if not present */ 1490 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1491 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1492 /* default */ 1493 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1494 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1495 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1496 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1497 if (issbaij) { 1498 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1499 } else { 1500 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1501 } 1502 /* Allow user's customization */ 1503 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1504 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1505 } 1506 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1507 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1508 if (!n_R) { 1509 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1510 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1511 } 1512 /* Set Up KSP for Neumann problem of BDDC */ 1513 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1514 } 1515 1516 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1517 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1518 if (pcbddc->dbg_flag) { 1519 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1520 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1521 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1522 } 1523 if (dirichlet) { /* Dirichlet */ 1524 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1525 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1526 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1527 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1528 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1529 /* need to be adapted? */ 1530 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1531 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1532 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1533 /* print info */ 1534 if (pcbddc->dbg_flag) { 1535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1537 } 1538 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1539 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1540 } 1541 } 1542 if (neumann) { /* Neumann */ 1543 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1544 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1545 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1546 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1547 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1548 /* need to be adapted? */ 1549 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1550 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1551 /* print info */ 1552 if (pcbddc->dbg_flag) { 1553 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1554 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1555 } 1556 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1557 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1558 } 1559 } 1560 } 1561 /* free Neumann problem's matrix */ 1562 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1568 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1569 { 1570 PetscErrorCode ierr; 1571 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1572 1573 PetscFunctionBegin; 1574 if (applytranspose) { 1575 if (pcbddc->local_auxmat1) { 1576 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1577 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1578 } 1579 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1580 } else { 1581 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1582 if (pcbddc->local_auxmat1) { 1583 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1584 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1585 } 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1593 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1594 { 1595 PetscErrorCode ierr; 1596 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1597 PC_IS* pcis = (PC_IS*) (pc->data); 1598 const PetscScalar zero = 0.0; 1599 1600 PetscFunctionBegin; 1601 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1602 if (applytranspose) { 1603 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1604 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1605 } else { 1606 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1607 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1608 } 1609 /* start communications from local primal nodes to rhs of coarse solver */ 1610 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1611 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 1614 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1615 /* TODO remove null space when doing multilevel */ 1616 if (pcbddc->coarse_ksp) { 1617 Vec rhs,sol; 1618 1619 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1620 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1621 if (applytranspose) { 1622 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1623 } else { 1624 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1625 } 1626 } 1627 1628 /* Local solution on R nodes */ 1629 if (pcis->n) { 1630 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1631 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1632 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1633 if (pcbddc->switch_static) { 1634 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 } 1637 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1638 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1639 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 if (pcbddc->switch_static) { 1642 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1643 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1644 } 1645 } 1646 1647 /* communications from coarse sol to local primal nodes */ 1648 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1649 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1650 1651 /* Sum contributions from two levels */ 1652 if (applytranspose) { 1653 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1654 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1655 } else { 1656 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1657 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1658 } 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1664 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1665 { 1666 PetscErrorCode ierr; 1667 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1668 PetscScalar *array; 1669 Vec from,to; 1670 1671 PetscFunctionBegin; 1672 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1673 from = pcbddc->coarse_vec; 1674 to = pcbddc->vec1_P; 1675 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1676 Vec tvec; 1677 1678 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1679 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1680 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1681 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1682 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1683 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1684 } 1685 } else { /* from local to global -> put data in coarse right hand side */ 1686 from = pcbddc->vec1_P; 1687 to = pcbddc->coarse_vec; 1688 } 1689 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1695 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1696 { 1697 PetscErrorCode ierr; 1698 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1699 PetscScalar *array; 1700 Vec from,to; 1701 1702 PetscFunctionBegin; 1703 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1704 from = pcbddc->coarse_vec; 1705 to = pcbddc->vec1_P; 1706 } else { /* from local to global -> put data in coarse right hand side */ 1707 from = pcbddc->vec1_P; 1708 to = pcbddc->coarse_vec; 1709 } 1710 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1711 if (smode == SCATTER_FORWARD) { 1712 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1713 Vec tvec; 1714 1715 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1716 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1717 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1718 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1719 } 1720 } else { 1721 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1722 ierr = VecResetArray(from);CHKERRQ(ierr); 1723 } 1724 } 1725 PetscFunctionReturn(0); 1726 } 1727 1728 /* uncomment for testing purposes */ 1729 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1732 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1733 { 1734 PetscErrorCode ierr; 1735 PC_IS* pcis = (PC_IS*)(pc->data); 1736 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1737 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1738 /* one and zero */ 1739 PetscScalar one=1.0,zero=0.0; 1740 /* space to store constraints and their local indices */ 1741 PetscScalar *temp_quadrature_constraint; 1742 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1743 /* iterators */ 1744 PetscInt i,j,k,total_counts,temp_start_ptr; 1745 /* BLAS integers */ 1746 PetscBLASInt lwork,lierr; 1747 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1748 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1749 /* reuse */ 1750 PetscInt olocal_primal_size; 1751 PetscInt *oprimal_indices_local_idxs; 1752 /* change of basis */ 1753 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1754 PetscBool boolforchange,qr_needed; 1755 PetscBT touched,change_basis,qr_needed_idx; 1756 /* auxiliary stuff */ 1757 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1758 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1759 PetscScalar *temp_quadrature_constraint_work=NULL; 1760 /* some quantities */ 1761 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1762 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1763 1764 PetscFunctionBegin; 1765 /* Destroy Mat objects computed previously */ 1766 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1767 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1768 1769 /* print some info */ 1770 if (pcbddc->dbg_flag) { 1771 IS vertices; 1772 PetscInt nv,nedges,nfaces; 1773 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1774 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1775 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1776 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1777 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1778 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1779 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1780 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1781 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1782 } 1783 1784 if (!pcbddc->adaptive_selection) { 1785 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1786 MatNullSpace nearnullsp; 1787 const Vec *nearnullvecs; 1788 Vec *localnearnullsp; 1789 PetscScalar *array; 1790 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1791 PetscBool nnsp_has_cnst; 1792 /* LAPACK working arrays for SVD or POD */ 1793 PetscBool skip_lapack; 1794 PetscScalar *work; 1795 PetscReal *singular_vals; 1796 #if defined(PETSC_USE_COMPLEX) 1797 PetscReal *rwork; 1798 #endif 1799 #if defined(PETSC_MISSING_LAPACK_GESVD) 1800 PetscScalar *temp_basis,*correlation_mat; 1801 #else 1802 PetscBLASInt dummy_int=1; 1803 PetscScalar dummy_scalar=1.; 1804 #endif 1805 1806 /* Get index sets for faces, edges and vertices from graph */ 1807 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1808 /* free unneeded index sets */ 1809 if (!pcbddc->use_vertices) { 1810 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1811 } 1812 if (!pcbddc->use_edges) { 1813 for (i=0;i<n_ISForEdges;i++) { 1814 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1815 } 1816 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1817 n_ISForEdges = 0; 1818 } 1819 if (!pcbddc->use_faces) { 1820 for (i=0;i<n_ISForFaces;i++) { 1821 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1822 } 1823 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1824 n_ISForFaces = 0; 1825 } 1826 /* HACKS (the following two blocks of code) */ 1827 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1828 pcbddc->use_change_of_basis = PETSC_TRUE; 1829 if (!ISForEdges) { 1830 pcbddc->use_change_on_faces = PETSC_TRUE; 1831 } 1832 } 1833 if (pcbddc->NullSpace) { 1834 /* use_change_of_basis should be consistent among processors */ 1835 PetscBool tbool[2],gbool[2]; 1836 tbool [0] = pcbddc->use_change_of_basis; 1837 tbool [1] = pcbddc->use_change_on_faces; 1838 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1839 pcbddc->use_change_of_basis = gbool[0]; 1840 pcbddc->use_change_on_faces = gbool[1]; 1841 } 1842 1843 /* check if near null space is attached to global mat */ 1844 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1845 if (nearnullsp) { 1846 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1847 /* remove any stored info */ 1848 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1849 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1850 /* store information for BDDC solver reuse */ 1851 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1852 pcbddc->onearnullspace = nearnullsp; 1853 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1854 for (i=0;i<nnsp_size;i++) { 1855 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1856 } 1857 } else { /* if near null space is not provided BDDC uses constants by default */ 1858 nnsp_size = 0; 1859 nnsp_has_cnst = PETSC_TRUE; 1860 } 1861 /* get max number of constraints on a single cc */ 1862 max_constraints = nnsp_size; 1863 if (nnsp_has_cnst) max_constraints++; 1864 1865 /* 1866 Evaluate maximum storage size needed by the procedure 1867 - temp_indices will contain start index of each constraint stored as follows 1868 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1869 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1870 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1871 */ 1872 total_counts = n_ISForFaces+n_ISForEdges; 1873 total_counts *= max_constraints; 1874 n_vertices = 0; 1875 if (ISForVertices) { 1876 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1877 } 1878 total_counts += n_vertices; 1879 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1880 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1881 total_counts = 0; 1882 max_size_of_constraint = 0; 1883 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1884 if (i<n_ISForEdges) { 1885 used_IS = &ISForEdges[i]; 1886 } else { 1887 used_IS = &ISForFaces[i-n_ISForEdges]; 1888 } 1889 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1890 total_counts += j; 1891 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1892 } 1893 total_counts *= max_constraints; 1894 total_counts += n_vertices; 1895 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1896 /* get local part of global near null space vectors */ 1897 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1898 for (k=0;k<nnsp_size;k++) { 1899 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1900 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1901 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1902 } 1903 1904 /* whether or not to skip lapack calls */ 1905 skip_lapack = PETSC_TRUE; 1906 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1907 1908 /* allocate some auxiliary stuff */ 1909 if (!skip_lapack || pcbddc->use_qr_single) { 1910 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1911 } else { 1912 gidxs = NULL; 1913 permutation = NULL; 1914 temp_indices_to_constraint_work = NULL; 1915 temp_quadrature_constraint_work = NULL; 1916 } 1917 1918 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1919 if (!skip_lapack) { 1920 PetscScalar temp_work; 1921 1922 #if defined(PETSC_MISSING_LAPACK_GESVD) 1923 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1924 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1925 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1926 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1927 #if defined(PETSC_USE_COMPLEX) 1928 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1929 #endif 1930 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1931 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1932 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1933 lwork = -1; 1934 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1935 #if !defined(PETSC_USE_COMPLEX) 1936 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1937 #else 1938 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1939 #endif 1940 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1941 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1942 #else /* on missing GESVD */ 1943 /* SVD */ 1944 PetscInt max_n,min_n; 1945 max_n = max_size_of_constraint; 1946 min_n = max_constraints; 1947 if (max_size_of_constraint < max_constraints) { 1948 min_n = max_size_of_constraint; 1949 max_n = max_constraints; 1950 } 1951 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1952 #if defined(PETSC_USE_COMPLEX) 1953 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1954 #endif 1955 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1956 lwork = -1; 1957 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1958 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1959 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1960 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1961 #if !defined(PETSC_USE_COMPLEX) 1962 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1963 #else 1964 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1965 #endif 1966 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1967 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1968 #endif /* on missing GESVD */ 1969 /* Allocate optimal workspace */ 1970 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1971 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1972 } 1973 /* Now we can loop on constraining sets */ 1974 total_counts = 0; 1975 temp_indices[0] = 0; 1976 /* vertices */ 1977 if (ISForVertices) { 1978 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1979 if (nnsp_has_cnst) { /* consider all vertices */ 1980 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1981 for (i=0;i<n_vertices;i++) { 1982 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1983 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1984 total_counts++; 1985 } 1986 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1987 PetscBool used_vertex; 1988 for (i=0;i<n_vertices;i++) { 1989 used_vertex = PETSC_FALSE; 1990 k = 0; 1991 while (!used_vertex && k<nnsp_size) { 1992 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1993 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1994 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1995 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1996 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1997 total_counts++; 1998 used_vertex = PETSC_TRUE; 1999 } 2000 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2001 k++; 2002 } 2003 } 2004 } 2005 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2006 n_vertices = total_counts; 2007 } 2008 2009 /* edges and faces */ 2010 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2011 if (ncc<n_ISForEdges) { 2012 used_IS = &ISForEdges[ncc]; 2013 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2014 } else { 2015 used_IS = &ISForFaces[ncc-n_ISForEdges]; 2016 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2017 } 2018 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2019 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2020 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2021 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2022 /* change of basis should not be performed on local periodic nodes */ 2023 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2024 if (nnsp_has_cnst) { 2025 PetscScalar quad_value; 2026 temp_constraints++; 2027 if (!pcbddc->use_nnsp_true) { 2028 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2029 } else { 2030 quad_value = 1.0; 2031 } 2032 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2033 for (j=0;j<size_of_constraint;j++) { 2034 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2035 } 2036 /* sort by global ordering if using lapack subroutines (not needed!) */ 2037 if (!skip_lapack || pcbddc->use_qr_single) { 2038 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2039 for (j=0;j<size_of_constraint;j++) { 2040 permutation[j]=j; 2041 } 2042 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2043 for (j=0;j<size_of_constraint;j++) { 2044 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2045 } 2046 for (j=0;j<size_of_constraint;j++) { 2047 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2048 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2049 } 2050 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2051 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2052 } 2053 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2054 total_counts++; 2055 } 2056 for (k=0;k<nnsp_size;k++) { 2057 PetscReal real_value; 2058 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2059 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2060 for (j=0;j<size_of_constraint;j++) { 2061 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2062 } 2063 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2064 /* check if array is null on the connected component */ 2065 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2066 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2067 if (real_value > 0.0) { /* keep indices and values */ 2068 /* sort by global ordering if using lapack subroutines */ 2069 if (!skip_lapack || pcbddc->use_qr_single) { 2070 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2071 for (j=0;j<size_of_constraint;j++) { 2072 permutation[j]=j; 2073 } 2074 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2075 for (j=0;j<size_of_constraint;j++) { 2076 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2077 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2078 } 2079 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2080 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2081 } 2082 temp_constraints++; 2083 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2084 total_counts++; 2085 } 2086 } 2087 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2088 valid_constraints = temp_constraints; 2089 if (!pcbddc->use_nnsp_true && temp_constraints) { 2090 if (temp_constraints == 1) { /* just normalize the constraint */ 2091 PetscScalar norm; 2092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2093 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2094 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2095 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2096 } else { /* perform SVD */ 2097 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2098 2099 #if defined(PETSC_MISSING_LAPACK_GESVD) 2100 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2101 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2102 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2103 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2104 from that computed using LAPACKgesvd 2105 -> This is due to a different computation of eigenvectors in LAPACKheev 2106 -> The quality of the POD-computed basis will be the same */ 2107 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2108 /* Store upper triangular part of correlation matrix */ 2109 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2110 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2111 for (j=0;j<temp_constraints;j++) { 2112 for (k=0;k<j+1;k++) { 2113 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2114 } 2115 } 2116 /* compute eigenvalues and eigenvectors of correlation matrix */ 2117 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2118 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2119 #if !defined(PETSC_USE_COMPLEX) 2120 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2121 #else 2122 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2123 #endif 2124 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2125 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2126 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2127 j = 0; 2128 while (j < temp_constraints && singular_vals[j] < tol) j++; 2129 total_counts = total_counts-j; 2130 valid_constraints = temp_constraints-j; 2131 /* scale and copy POD basis into used quadrature memory */ 2132 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2133 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2134 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2136 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2137 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2138 if (j<temp_constraints) { 2139 PetscInt ii; 2140 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2141 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2142 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2143 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2144 for (k=0;k<temp_constraints-j;k++) { 2145 for (ii=0;ii<size_of_constraint;ii++) { 2146 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2147 } 2148 } 2149 } 2150 #else /* on missing GESVD */ 2151 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2152 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2153 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2154 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2155 #if !defined(PETSC_USE_COMPLEX) 2156 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2157 #else 2158 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2159 #endif 2160 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2161 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2162 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2163 k = temp_constraints; 2164 if (k > size_of_constraint) k = size_of_constraint; 2165 j = 0; 2166 while (j < k && singular_vals[k-j-1] < tol) j++; 2167 valid_constraints = k-j; 2168 total_counts = total_counts-temp_constraints+valid_constraints; 2169 #endif /* on missing GESVD */ 2170 } 2171 } 2172 /* setting change_of_basis flag is safe now */ 2173 if (boolforchange) { 2174 for (j=0;j<valid_constraints;j++) { 2175 PetscBTSet(change_basis,total_counts-j-1); 2176 } 2177 } 2178 } 2179 /* free workspace */ 2180 if (!skip_lapack || pcbddc->use_qr_single) { 2181 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2182 } 2183 if (!skip_lapack) { 2184 ierr = PetscFree(work);CHKERRQ(ierr); 2185 #if defined(PETSC_USE_COMPLEX) 2186 ierr = PetscFree(rwork);CHKERRQ(ierr); 2187 #endif 2188 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2189 #if defined(PETSC_MISSING_LAPACK_GESVD) 2190 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2191 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2192 #endif 2193 } 2194 for (k=0;k<nnsp_size;k++) { 2195 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2196 } 2197 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2198 /* free index sets of faces, edges and vertices */ 2199 for (i=0;i<n_ISForFaces;i++) { 2200 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2201 } 2202 if (n_ISForFaces) { 2203 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2204 } 2205 for (i=0;i<n_ISForEdges;i++) { 2206 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2207 } 2208 if (n_ISForEdges) { 2209 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2210 } 2211 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2212 } else { 2213 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2214 PetscInt cum = 0; 2215 2216 total_counts = 0; 2217 n_vertices = 0; 2218 if (sub_schurs->is_Ej_com) { 2219 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2220 } 2221 max_constraints = 0; 2222 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2223 total_counts += pcbddc->adaptive_constraints_n[i]; 2224 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2225 } 2226 temp_indices = pcbddc->adaptive_constraints_ptrs; 2227 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2228 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2229 2230 #if 0 2231 printf("Found %d totals\n",total_counts); 2232 for (i=0;i<total_counts;i++) { 2233 printf("const %d, start %d",i,temp_indices[i]); 2234 printf(" end %d:\n",temp_indices[i+1]); 2235 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2236 printf(" idxs %d",temp_indices_to_constraint[j]); 2237 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2238 } 2239 } 2240 for (i=0;i<n_vertices;i++) { 2241 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2242 } 2243 for (i=0;i<sub_schurs->n_subs;i++) { 2244 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2245 } 2246 #endif 2247 2248 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2249 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2250 /* Change of basis */ 2251 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2252 if (pcbddc->use_change_of_basis) { 2253 cum = n_vertices; 2254 for (i=0;i<sub_schurs->n_subs;i++) { 2255 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2256 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2257 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2258 } 2259 } 2260 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2261 } 2262 } 2263 } 2264 2265 /* map temp_indices_to_constraint in boundary numbering */ 2266 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2267 if (i != temp_indices[total_counts]) { 2268 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2269 } 2270 2271 /* set quantities in pcbddc data structure and store previous primal size */ 2272 /* n_vertices defines the number of subdomain corners in the primal space */ 2273 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2274 olocal_primal_size = pcbddc->local_primal_size; 2275 pcbddc->local_primal_size = total_counts; 2276 pcbddc->n_vertices = n_vertices; 2277 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2278 2279 /* Create constraint matrix */ 2280 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2281 /* so we need to set it up properly either with or without change of basis */ 2282 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2283 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2284 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2285 /* array to compute a local numbering of constraints : vertices first then constraints */ 2286 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2287 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2288 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2289 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2290 /* auxiliary stuff for basis change */ 2291 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2292 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2293 2294 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2295 total_primal_vertices=0; 2296 for (i=0;i<pcbddc->local_primal_size;i++) { 2297 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2298 if (size_of_constraint == 1) { 2299 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2300 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2301 aux_primal_minloc[total_primal_vertices]=0; 2302 total_primal_vertices++; 2303 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2304 PetscInt min_loc,min_index; 2305 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2306 /* find first untouched local node */ 2307 k = 0; 2308 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2309 min_index = global_indices[k]; 2310 min_loc = k; 2311 /* search the minimum among global nodes already untouched on the cc */ 2312 for (k=1;k<size_of_constraint;k++) { 2313 /* there can be more than one constraint on a single connected component */ 2314 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2315 min_index = global_indices[k]; 2316 min_loc = k; 2317 } 2318 } 2319 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2320 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2321 aux_primal_minloc[total_primal_vertices]=min_loc; 2322 total_primal_vertices++; 2323 } 2324 } 2325 /* determine if a QR strategy is needed for change of basis */ 2326 qr_needed = PETSC_FALSE; 2327 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2328 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2329 if (PetscBTLookup(change_basis,i)) { 2330 if (!pcbddc->use_qr_single) { 2331 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2332 j = 0; 2333 for (k=0;k<size_of_constraint;k++) { 2334 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2335 j++; 2336 } 2337 } 2338 /* found more than one primal dof on the cc */ 2339 if (j > 1) { 2340 PetscBTSet(qr_needed_idx,i); 2341 qr_needed = PETSC_TRUE; 2342 } 2343 } else { 2344 PetscBTSet(qr_needed_idx,i); 2345 qr_needed = PETSC_TRUE; 2346 } 2347 } 2348 } 2349 /* free workspace */ 2350 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2351 2352 /* permute indices in order to have a sorted set of vertices */ 2353 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2354 2355 /* nonzero structure of constraint matrix */ 2356 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2357 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2358 j=total_primal_vertices; 2359 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2360 if (!PetscBTLookup(change_basis,i)) { 2361 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2362 j++; 2363 } 2364 } 2365 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2366 ierr = PetscFree(nnz);CHKERRQ(ierr); 2367 /* set values in constraint matrix */ 2368 for (i=0;i<total_primal_vertices;i++) { 2369 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2370 } 2371 total_counts = total_primal_vertices; 2372 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2373 if (!PetscBTLookup(change_basis,i)) { 2374 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2375 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2376 total_counts++; 2377 } 2378 } 2379 /* assembling */ 2380 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2381 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2382 /* 2383 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2384 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2385 */ 2386 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2387 if (pcbddc->use_change_of_basis) { 2388 /* dual and primal dofs on a single cc */ 2389 PetscInt dual_dofs,primal_dofs; 2390 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2391 PetscInt primal_counter; 2392 /* working stuff for GEQRF */ 2393 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2394 PetscBLASInt lqr_work; 2395 /* working stuff for UNGQR */ 2396 PetscScalar *gqr_work,lgqr_work_t; 2397 PetscBLASInt lgqr_work; 2398 /* working stuff for TRTRS */ 2399 PetscScalar *trs_rhs; 2400 PetscBLASInt Blas_NRHS; 2401 /* pointers for values insertion into change of basis matrix */ 2402 PetscInt *start_rows,*start_cols; 2403 PetscScalar *start_vals; 2404 /* working stuff for values insertion */ 2405 PetscBT is_primal; 2406 /* matrix sizes */ 2407 PetscInt global_size,local_size; 2408 /* temporary change of basis */ 2409 Mat localChangeOfBasisMatrix; 2410 /* extra space for debugging */ 2411 PetscScalar *dbg_work; 2412 2413 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2414 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2415 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2416 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2417 /* nonzeros for local mat */ 2418 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2419 for (i=0;i<pcis->n;i++) nnz[i]=1; 2420 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2421 if (PetscBTLookup(change_basis,i)) { 2422 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2423 if (PetscBTLookup(qr_needed_idx,i)) { 2424 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2425 } else { 2426 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2427 /* get local primal index on the cc */ 2428 j = 0; 2429 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2430 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2431 } 2432 } 2433 } 2434 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2435 ierr = PetscFree(nnz);CHKERRQ(ierr); 2436 /* Set initial identity in the matrix */ 2437 for (i=0;i<pcis->n;i++) { 2438 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2439 } 2440 2441 if (pcbddc->dbg_flag) { 2442 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2443 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2444 } 2445 2446 2447 /* Now we loop on the constraints which need a change of basis */ 2448 /* 2449 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2450 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2451 2452 Basic blocks of change of basis matrix T computed by 2453 2454 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2455 2456 | 1 0 ... 0 s_1/S | 2457 | 0 1 ... 0 s_2/S | 2458 | ... | 2459 | 0 ... 1 s_{n-1}/S | 2460 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2461 2462 with S = \sum_{i=1}^n s_i^2 2463 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2464 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2465 2466 - QR decomposition of constraints otherwise 2467 */ 2468 if (qr_needed) { 2469 /* space to store Q */ 2470 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2471 /* first we issue queries for optimal work */ 2472 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2473 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2474 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2475 lqr_work = -1; 2476 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2477 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2478 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2479 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2480 lgqr_work = -1; 2481 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2482 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2483 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2484 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2485 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2486 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2487 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2488 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2489 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2490 /* array to store scaling factors for reflectors */ 2491 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2492 /* array to store rhs and solution of triangular solver */ 2493 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2494 /* allocating workspace for check */ 2495 if (pcbddc->dbg_flag) { 2496 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2497 } 2498 } 2499 /* array to store whether a node is primal or not */ 2500 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2501 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2502 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2503 if (i != total_primal_vertices) { 2504 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2505 } 2506 for (i=0;i<total_primal_vertices;i++) { 2507 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2508 } 2509 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2510 2511 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2512 /* -> using implicit ordering contained in temp_indices data */ 2513 total_counts = pcbddc->n_vertices; 2514 primal_counter = total_counts; 2515 while (total_counts<pcbddc->local_primal_size) { 2516 primal_dofs = 1; 2517 if (PetscBTLookup(change_basis,total_counts)) { 2518 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2519 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2520 primal_dofs++; 2521 } 2522 /* get constraint info */ 2523 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2524 dual_dofs = size_of_constraint-primal_dofs; 2525 2526 if (pcbddc->dbg_flag) { 2527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2528 } 2529 2530 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2531 2532 /* copy quadrature constraints for change of basis check */ 2533 if (pcbddc->dbg_flag) { 2534 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2535 } 2536 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2537 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2538 2539 /* compute QR decomposition of constraints */ 2540 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2541 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2542 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2543 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2544 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2545 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2546 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2547 2548 /* explictly compute R^-T */ 2549 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2550 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2551 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2552 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2554 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2555 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2556 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2557 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2558 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2559 2560 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2563 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2564 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2565 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2566 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2567 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2568 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2569 2570 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2571 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2572 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2573 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2574 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2575 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2576 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2577 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2579 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2580 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2581 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2582 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2583 2584 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2585 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2586 /* insert cols for primal dofs */ 2587 for (j=0;j<primal_dofs;j++) { 2588 start_vals = &qr_basis[j*size_of_constraint]; 2589 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2590 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2591 } 2592 /* insert cols for dual dofs */ 2593 for (j=0,k=0;j<dual_dofs;k++) { 2594 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2595 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2596 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2597 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2598 j++; 2599 } 2600 } 2601 2602 /* check change of basis */ 2603 if (pcbddc->dbg_flag) { 2604 PetscInt ii,jj; 2605 PetscBool valid_qr=PETSC_TRUE; 2606 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2607 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2609 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2610 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2611 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2612 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2613 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2614 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2615 for (jj=0;jj<size_of_constraint;jj++) { 2616 for (ii=0;ii<primal_dofs;ii++) { 2617 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2618 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2619 } 2620 } 2621 if (!valid_qr) { 2622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2623 for (jj=0;jj<size_of_constraint;jj++) { 2624 for (ii=0;ii<primal_dofs;ii++) { 2625 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2626 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2627 } 2628 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2629 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2630 } 2631 } 2632 } 2633 } else { 2634 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2635 } 2636 } 2637 } else { /* simple transformation block */ 2638 PetscInt row,col; 2639 PetscScalar val,norm; 2640 2641 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2642 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2643 for (j=0;j<size_of_constraint;j++) { 2644 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2645 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2646 if (!PetscBTLookup(is_primal,row_B)) { 2647 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2648 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2649 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2650 } else { 2651 for (k=0;k<size_of_constraint;k++) { 2652 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2653 if (row != col) { 2654 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2655 } else { 2656 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2657 } 2658 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2659 } 2660 } 2661 } 2662 if (pcbddc->dbg_flag) { 2663 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2664 } 2665 } 2666 /* increment primal counter */ 2667 primal_counter += primal_dofs; 2668 } else { 2669 if (pcbddc->dbg_flag) { 2670 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2671 } 2672 } 2673 /* increment constraint counter total_counts */ 2674 total_counts += primal_dofs; 2675 } 2676 2677 /* free workspace */ 2678 if (qr_needed) { 2679 if (pcbddc->dbg_flag) { 2680 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2681 } 2682 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2683 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2684 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2685 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2686 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2687 } 2688 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2689 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2690 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2691 2692 /* assembling of global change of variable */ 2693 { 2694 Mat tmat; 2695 PetscInt bs; 2696 2697 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2698 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2699 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2700 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2701 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2702 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2703 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2704 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2705 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2706 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2707 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2708 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2709 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2710 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2711 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2712 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2713 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2714 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2715 } 2716 /* check */ 2717 if (pcbddc->dbg_flag) { 2718 PetscReal error; 2719 Vec x,x_change; 2720 2721 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2722 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2723 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2724 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2725 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2726 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2727 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2728 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2729 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2730 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2731 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2732 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2733 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2734 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2735 ierr = VecDestroy(&x);CHKERRQ(ierr); 2736 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2737 } 2738 2739 /* adapt sub_schurs computed (if any) */ 2740 if (pcbddc->use_deluxe_scaling) { 2741 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2742 if (sub_schurs->n_subs_par_g) { 2743 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2744 } 2745 if (sub_schurs->S_Ej_all) { 2746 Mat S_1,S_2,tmat; 2747 IS is_all_N; 2748 2749 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2750 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2751 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2752 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2753 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2754 sub_schurs->S_Ej_all = S_1; 2755 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2756 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2757 sub_schurs->sum_S_Ej_all = S_2; 2758 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2759 } 2760 } 2761 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2762 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2763 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2764 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2765 } 2766 2767 /* set up change of basis context */ 2768 if (pcbddc->ChangeOfBasisMatrix) { 2769 PCBDDCChange_ctx change_ctx; 2770 2771 if (!pcbddc->new_global_mat) { 2772 PetscInt global_size,local_size; 2773 2774 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2775 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2776 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2777 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2778 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2779 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2780 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2781 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2782 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2783 } else { 2784 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2785 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2786 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2787 } 2788 if (!pcbddc->user_ChangeOfBasisMatrix) { 2789 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2790 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2791 } else { 2792 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2793 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2794 } 2795 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2796 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2797 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2798 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 } 2800 2801 /* get indices in local ordering for vertices and constraints */ 2802 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2803 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2804 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2805 } 2806 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2807 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2808 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2809 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2810 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2811 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2812 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2813 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2814 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2815 /* set quantities in PCBDDC data struct */ 2816 pcbddc->n_actual_vertices = i; 2817 /* check if a new primal space has been introduced */ 2818 pcbddc->new_primal_space_local = PETSC_TRUE; 2819 if (olocal_primal_size == pcbddc->local_primal_size) { 2820 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2821 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2822 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2823 } 2824 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2825 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2826 2827 /* flush dbg viewer */ 2828 if (pcbddc->dbg_flag) { 2829 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2830 } 2831 2832 /* free workspace */ 2833 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2834 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2835 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2836 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2837 if (!pcbddc->adaptive_selection) { 2838 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2839 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2840 } else { 2841 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2842 pcbddc->adaptive_constraints_ptrs, 2843 pcbddc->adaptive_constraints_idxs, 2844 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2845 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2846 } 2847 PetscFunctionReturn(0); 2848 } 2849 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2852 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2853 { 2854 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2855 PC_IS *pcis = (PC_IS*)pc->data; 2856 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2857 PetscInt ierr,i,vertex_size; 2858 PetscViewer viewer=pcbddc->dbg_viewer; 2859 2860 PetscFunctionBegin; 2861 /* Reset previously computed graph */ 2862 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2863 /* Init local Graph struct */ 2864 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2865 2866 /* Check validity of the csr graph passed in by the user */ 2867 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2868 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2869 } 2870 2871 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2872 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2873 PetscInt *xadj,*adjncy; 2874 PetscInt nvtxs; 2875 PetscBool flg_row=PETSC_FALSE; 2876 2877 if (pcbddc->use_local_adj) { 2878 2879 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2880 if (flg_row) { 2881 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2882 pcbddc->computed_rowadj = PETSC_TRUE; 2883 } 2884 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2885 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2886 IS is_dummy; 2887 ISLocalToGlobalMapping l2gmap_dummy; 2888 PetscInt j,sum; 2889 PetscInt *cxadj,*cadjncy; 2890 const PetscInt *idxs; 2891 PCBDDCGraph graph; 2892 PetscBT is_on_boundary; 2893 2894 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2895 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2896 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2897 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2898 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2899 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2900 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2901 if (flg_row) { 2902 graph->xadj = xadj; 2903 graph->adjncy = adjncy; 2904 } 2905 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2906 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2907 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2908 2909 if (pcbddc->dbg_flag) { 2910 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2911 for (i=0;i<graph->ncc;i++) { 2912 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2913 } 2914 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2915 } 2916 2917 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2918 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2919 for (i=0;i<pcis->n_B;i++) { 2920 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2921 } 2922 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2923 2924 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2925 sum = 0; 2926 for (i=0;i<graph->ncc;i++) { 2927 PetscInt sizecc = 0; 2928 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2929 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2930 sizecc++; 2931 } 2932 } 2933 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2934 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2935 cxadj[graph->queue[j]] = sizecc; 2936 } 2937 } 2938 sum += sizecc*sizecc; 2939 } 2940 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2941 sum = 0; 2942 for (i=0;i<pcis->n;i++) { 2943 PetscInt temp = cxadj[i]; 2944 cxadj[i] = sum; 2945 sum += temp; 2946 } 2947 cxadj[pcis->n] = sum; 2948 for (i=0;i<graph->ncc;i++) { 2949 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2950 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2951 PetscInt k,sizecc = 0; 2952 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2953 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2954 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2955 sizecc++; 2956 } 2957 } 2958 } 2959 } 2960 } 2961 if (pcis->n) { 2962 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2963 } else { 2964 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2965 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2966 } 2967 graph->xadj = 0; 2968 graph->adjncy = 0; 2969 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2970 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2971 } 2972 } 2973 2974 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2975 vertex_size = 1; 2976 if (pcbddc->user_provided_isfordofs) { 2977 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2978 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2979 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2980 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2981 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2982 } 2983 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2984 pcbddc->n_ISForDofs = 0; 2985 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2986 } 2987 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2988 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2989 } else { 2990 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2991 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2992 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2993 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2994 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2995 } 2996 } 2997 } 2998 2999 /* Setup of Graph */ 3000 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3001 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3002 } 3003 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3004 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3005 } 3006 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 3007 3008 /* Graph's connected components analysis */ 3009 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3010 3011 /* print some info to stdout */ 3012 if (pcbddc->dbg_flag) { 3013 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 3014 } 3015 3016 /* mark topography has done */ 3017 pcbddc->recompute_topography = PETSC_FALSE; 3018 PetscFunctionReturn(0); 3019 } 3020 3021 #undef __FUNCT__ 3022 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3023 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3024 { 3025 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3026 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3027 PetscErrorCode ierr; 3028 3029 PetscFunctionBegin; 3030 n = 0; 3031 vertices = 0; 3032 if (pcbddc->ConstraintMatrix) { 3033 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3034 for (i=0;i<local_primal_size;i++) { 3035 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3036 if (size_of_constraint == 1) n++; 3037 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3038 } 3039 if (vertices_idx) { 3040 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3041 n = 0; 3042 for (i=0;i<local_primal_size;i++) { 3043 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3044 if (size_of_constraint == 1) { 3045 vertices[n++]=row_cmat_indices[0]; 3046 } 3047 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3048 } 3049 } 3050 } 3051 *n_vertices = n; 3052 if (vertices_idx) *vertices_idx = vertices; 3053 PetscFunctionReturn(0); 3054 } 3055 3056 #undef __FUNCT__ 3057 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3058 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3059 { 3060 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3061 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3062 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3063 PetscBT touched; 3064 PetscErrorCode ierr; 3065 3066 /* This function assumes that the number of local constraints per connected component 3067 is not greater than the number of nodes defined for the connected component 3068 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3069 PetscFunctionBegin; 3070 n = 0; 3071 constraints_index = 0; 3072 if (pcbddc->ConstraintMatrix) { 3073 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3074 max_size_of_constraint = 0; 3075 for (i=0;i<local_primal_size;i++) { 3076 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3077 if (size_of_constraint > 1) { 3078 n++; 3079 } 3080 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3081 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3082 } 3083 if (constraints_idx) { 3084 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3085 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3086 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3087 n = 0; 3088 for (i=0;i<local_primal_size;i++) { 3089 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3090 if (size_of_constraint > 1) { 3091 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3092 /* find first untouched local node */ 3093 j = 0; 3094 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3095 min_index = row_cmat_global_indices[j]; 3096 min_loc = j; 3097 /* search the minimum among nodes not yet touched on the connected component 3098 since there can be more than one constraint on a single cc */ 3099 for (j=1;j<size_of_constraint;j++) { 3100 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3101 min_index = row_cmat_global_indices[j]; 3102 min_loc = j; 3103 } 3104 } 3105 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3106 constraints_index[n++] = row_cmat_indices[min_loc]; 3107 } 3108 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3109 } 3110 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3111 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3112 } 3113 } 3114 *n_constraints = n; 3115 if (constraints_idx) *constraints_idx = constraints_index; 3116 PetscFunctionReturn(0); 3117 } 3118 3119 #undef __FUNCT__ 3120 #define __FUNCT__ "PCBDDCSubsetNumbering" 3121 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3122 { 3123 Vec local_vec,global_vec; 3124 IS seqis,paris; 3125 VecScatter scatter_ctx; 3126 PetscScalar *array; 3127 PetscInt *temp_global_dofs; 3128 PetscScalar globalsum; 3129 PetscInt i,j,s; 3130 PetscInt nlocals,first_index,old_index,max_local; 3131 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3132 PetscMPIInt *dof_sizes,*dof_displs; 3133 PetscBool first_found; 3134 PetscErrorCode ierr; 3135 3136 PetscFunctionBegin; 3137 /* mpi buffers */ 3138 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3139 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3140 j = ( !rank_prec_comm ? size_prec_comm : 0); 3141 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3142 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3143 /* get maximum size of subset */ 3144 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3145 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3146 max_local = 0; 3147 for (i=0;i<n_local_dofs;i++) { 3148 if (max_local < temp_global_dofs[i] ) { 3149 max_local = temp_global_dofs[i]; 3150 } 3151 } 3152 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3153 max_global++; 3154 max_local = 0; 3155 for (i=0;i<n_local_dofs;i++) { 3156 if (max_local < local_dofs[i] ) { 3157 max_local = local_dofs[i]; 3158 } 3159 } 3160 max_local++; 3161 /* allocate workspace */ 3162 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3163 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3164 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3165 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3166 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3167 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3168 /* create scatter */ 3169 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3170 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3171 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3172 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3173 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3174 /* init array */ 3175 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3176 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3177 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3178 if (local_dofs_mult) { 3179 for (i=0;i<n_local_dofs;i++) { 3180 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3181 } 3182 } else { 3183 for (i=0;i<n_local_dofs;i++) { 3184 array[local_dofs[i]]=1.0; 3185 } 3186 } 3187 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3188 /* scatter into global vec and get total number of global dofs */ 3189 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3190 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3191 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3192 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3193 /* Fill global_vec with cumulative function for global numbering */ 3194 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3195 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3196 nlocals = 0; 3197 first_index = -1; 3198 first_found = PETSC_FALSE; 3199 for (i=0;i<s;i++) { 3200 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3201 first_found = PETSC_TRUE; 3202 first_index = i; 3203 } 3204 nlocals += (PetscInt)PetscRealPart(array[i]); 3205 } 3206 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3207 if (!rank_prec_comm) { 3208 dof_displs[0]=0; 3209 for (i=1;i<size_prec_comm;i++) { 3210 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3211 } 3212 } 3213 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3214 if (first_found) { 3215 array[first_index] += (PetscScalar)nlocals; 3216 old_index = first_index; 3217 for (i=first_index+1;i<s;i++) { 3218 if (PetscRealPart(array[i]) > 0.1) { 3219 array[i] += array[old_index]; 3220 old_index = i; 3221 } 3222 } 3223 } 3224 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3225 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3226 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3227 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3228 /* get global ordering of local dofs */ 3229 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3230 if (local_dofs_mult) { 3231 for (i=0;i<n_local_dofs;i++) { 3232 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3233 } 3234 } else { 3235 for (i=0;i<n_local_dofs;i++) { 3236 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3237 } 3238 } 3239 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3240 /* free workspace */ 3241 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3242 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3243 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3244 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3245 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3246 /* return pointer to global ordering of local dofs */ 3247 *global_numbering_subset = temp_global_dofs; 3248 PetscFunctionReturn(0); 3249 } 3250 3251 #undef __FUNCT__ 3252 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3253 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3254 { 3255 PetscInt i,j; 3256 PetscScalar *alphas; 3257 PetscErrorCode ierr; 3258 3259 PetscFunctionBegin; 3260 /* this implements stabilized Gram-Schmidt */ 3261 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3262 for (i=0;i<n;i++) { 3263 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3264 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3265 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3266 } 3267 ierr = PetscFree(alphas);CHKERRQ(ierr); 3268 PetscFunctionReturn(0); 3269 } 3270 3271 #undef __FUNCT__ 3272 #define __FUNCT__ "MatISGetSubassemblingPattern" 3273 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3274 { 3275 Mat subdomain_adj; 3276 IS new_ranks,ranks_send_to; 3277 MatPartitioning partitioner; 3278 Mat_IS *matis; 3279 PetscInt n_neighs,*neighs,*n_shared,**shared; 3280 PetscInt prank; 3281 PetscMPIInt size,rank,color; 3282 PetscInt *xadj,*adjncy,*oldranks; 3283 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3284 PetscInt i,local_size,threshold=0; 3285 PetscErrorCode ierr; 3286 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3287 PetscSubcomm subcomm; 3288 3289 PetscFunctionBegin; 3290 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3291 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3292 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3293 3294 /* Get info on mapping */ 3295 matis = (Mat_IS*)(mat->data); 3296 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3297 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3298 3299 /* build local CSR graph of subdomains' connectivity */ 3300 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3301 xadj[0] = 0; 3302 xadj[1] = PetscMax(n_neighs-1,0); 3303 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3304 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3305 3306 if (threshold) { 3307 PetscInt xadj_count = 0; 3308 for (i=1;i<n_neighs;i++) { 3309 if (n_shared[i] > threshold) { 3310 adjncy[xadj_count] = neighs[i]; 3311 adjncy_wgt[xadj_count] = n_shared[i]; 3312 xadj_count++; 3313 } 3314 } 3315 xadj[1] = xadj_count; 3316 } else { 3317 if (xadj[1]) { 3318 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3319 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3320 } 3321 } 3322 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3323 if (use_square) { 3324 for (i=0;i<xadj[1];i++) { 3325 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3326 } 3327 } 3328 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3329 3330 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3331 3332 /* 3333 Restrict work on active processes only. 3334 */ 3335 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3336 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3337 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3338 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3339 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3340 if (color) { 3341 ierr = PetscFree(xadj);CHKERRQ(ierr); 3342 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3343 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3344 } else { 3345 PetscInt coarsening_ratio; 3346 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3347 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3348 prank = rank; 3349 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3350 /* 3351 for (i=0;i<size;i++) { 3352 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3353 } 3354 */ 3355 for (i=0;i<xadj[1];i++) { 3356 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3357 } 3358 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3359 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3360 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3361 3362 /* Partition */ 3363 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3364 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3365 if (use_vwgt) { 3366 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3367 v_wgt[0] = local_size; 3368 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3369 } 3370 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3371 coarsening_ratio = size/n_subdomains; 3372 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3373 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3374 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3375 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3376 3377 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3378 if (contiguous) { 3379 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3380 } else { 3381 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3382 } 3383 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3384 /* clean up */ 3385 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3386 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3387 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3388 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3389 } 3390 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3391 3392 /* assemble parallel IS for sends */ 3393 i = 1; 3394 if (color) i=0; 3395 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3396 3397 /* get back IS */ 3398 *is_sends = ranks_send_to; 3399 PetscFunctionReturn(0); 3400 } 3401 3402 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3403 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "MatISSubassemble" 3406 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3407 { 3408 Mat local_mat; 3409 Mat_IS *matis; 3410 IS is_sends_internal; 3411 PetscInt rows,cols,new_local_rows; 3412 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3413 PetscBool ismatis,isdense,newisdense,destroy_mat; 3414 ISLocalToGlobalMapping l2gmap; 3415 PetscInt* l2gmap_indices; 3416 const PetscInt* is_indices; 3417 MatType new_local_type; 3418 /* buffers */ 3419 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3420 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3421 PetscInt *recv_buffer_idxs_local; 3422 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3423 /* MPI */ 3424 MPI_Comm comm,comm_n; 3425 PetscSubcomm subcomm; 3426 PetscMPIInt n_sends,n_recvs,commsize; 3427 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3428 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3429 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3430 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3431 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3432 PetscErrorCode ierr; 3433 3434 PetscFunctionBegin; 3435 /* TODO: add missing checks */ 3436 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3437 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3438 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3439 PetscValidLogicalCollectiveInt(mat,nis,7); 3440 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3441 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3442 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3443 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3444 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3445 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3446 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3447 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3448 PetscInt mrows,mcols,mnrows,mncols; 3449 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3450 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3451 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3452 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3453 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3454 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3455 } 3456 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3457 PetscValidLogicalCollectiveInt(mat,bs,0); 3458 /* prepare IS for sending if not provided */ 3459 if (!is_sends) { 3460 PetscBool pcontig = PETSC_TRUE; 3461 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3462 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3463 } else { 3464 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3465 is_sends_internal = is_sends; 3466 } 3467 3468 /* get pointer of MATIS data */ 3469 matis = (Mat_IS*)mat->data; 3470 3471 /* get comm */ 3472 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3473 3474 /* compute number of sends */ 3475 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3476 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3477 3478 /* compute number of receives */ 3479 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3480 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3481 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3482 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3483 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3484 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3485 ierr = PetscFree(iflags);CHKERRQ(ierr); 3486 3487 /* restrict comm if requested */ 3488 subcomm = 0; 3489 destroy_mat = PETSC_FALSE; 3490 if (restrict_comm) { 3491 PetscMPIInt color,subcommsize; 3492 3493 color = 0; 3494 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3495 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3496 subcommsize = commsize - subcommsize; 3497 /* check if reuse has been requested */ 3498 if (reuse == MAT_REUSE_MATRIX) { 3499 if (*mat_n) { 3500 PetscMPIInt subcommsize2; 3501 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3502 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3503 comm_n = PetscObjectComm((PetscObject)*mat_n); 3504 } else { 3505 comm_n = PETSC_COMM_SELF; 3506 } 3507 } else { /* MAT_INITIAL_MATRIX */ 3508 PetscMPIInt rank; 3509 3510 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3511 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3512 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3513 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3514 comm_n = PetscSubcommChild(subcomm); 3515 } 3516 /* flag to destroy *mat_n if not significative */ 3517 if (color) destroy_mat = PETSC_TRUE; 3518 } else { 3519 comm_n = comm; 3520 } 3521 3522 /* prepare send/receive buffers */ 3523 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3524 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3525 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3526 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3527 if (nis) { 3528 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3529 } 3530 3531 /* Get data from local matrices */ 3532 if (!isdense) { 3533 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3534 /* TODO: See below some guidelines on how to prepare the local buffers */ 3535 /* 3536 send_buffer_vals should contain the raw values of the local matrix 3537 send_buffer_idxs should contain: 3538 - MatType_PRIVATE type 3539 - PetscInt size_of_l2gmap 3540 - PetscInt global_row_indices[size_of_l2gmap] 3541 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3542 */ 3543 } else { 3544 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3545 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3546 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3547 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3548 send_buffer_idxs[1] = i; 3549 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3550 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3551 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3552 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3553 for (i=0;i<n_sends;i++) { 3554 ilengths_vals[is_indices[i]] = len*len; 3555 ilengths_idxs[is_indices[i]] = len+2; 3556 } 3557 } 3558 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3559 /* additional is (if any) */ 3560 if (nis) { 3561 PetscMPIInt psum; 3562 PetscInt j; 3563 for (j=0,psum=0;j<nis;j++) { 3564 PetscInt plen; 3565 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3566 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3567 psum += len+1; /* indices + lenght */ 3568 } 3569 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3570 for (j=0,psum=0;j<nis;j++) { 3571 PetscInt plen; 3572 const PetscInt *is_array_idxs; 3573 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3574 send_buffer_idxs_is[psum] = plen; 3575 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3576 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3577 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3578 psum += plen+1; /* indices + lenght */ 3579 } 3580 for (i=0;i<n_sends;i++) { 3581 ilengths_idxs_is[is_indices[i]] = psum; 3582 } 3583 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3584 } 3585 3586 buf_size_idxs = 0; 3587 buf_size_vals = 0; 3588 buf_size_idxs_is = 0; 3589 for (i=0;i<n_recvs;i++) { 3590 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3591 buf_size_vals += (PetscInt)olengths_vals[i]; 3592 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3593 } 3594 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3595 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3596 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3597 3598 /* get new tags for clean communications */ 3599 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3600 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3601 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3602 3603 /* allocate for requests */ 3604 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3605 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3606 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3607 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3608 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3609 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3610 3611 /* communications */ 3612 ptr_idxs = recv_buffer_idxs; 3613 ptr_vals = recv_buffer_vals; 3614 ptr_idxs_is = recv_buffer_idxs_is; 3615 for (i=0;i<n_recvs;i++) { 3616 source_dest = onodes[i]; 3617 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3618 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3619 ptr_idxs += olengths_idxs[i]; 3620 ptr_vals += olengths_vals[i]; 3621 if (nis) { 3622 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3623 ptr_idxs_is += olengths_idxs_is[i]; 3624 } 3625 } 3626 for (i=0;i<n_sends;i++) { 3627 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3628 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3629 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3630 if (nis) { 3631 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3632 } 3633 } 3634 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3635 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3636 3637 /* assemble new l2g map */ 3638 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3639 ptr_idxs = recv_buffer_idxs; 3640 new_local_rows = 0; 3641 for (i=0;i<n_recvs;i++) { 3642 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3643 ptr_idxs += olengths_idxs[i]; 3644 } 3645 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3646 ptr_idxs = recv_buffer_idxs; 3647 new_local_rows = 0; 3648 for (i=0;i<n_recvs;i++) { 3649 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3650 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3651 ptr_idxs += olengths_idxs[i]; 3652 } 3653 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3654 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3655 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3656 3657 /* infer new local matrix type from received local matrices type */ 3658 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3659 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3660 if (n_recvs) { 3661 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3662 ptr_idxs = recv_buffer_idxs; 3663 for (i=0;i<n_recvs;i++) { 3664 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3665 new_local_type_private = MATAIJ_PRIVATE; 3666 break; 3667 } 3668 ptr_idxs += olengths_idxs[i]; 3669 } 3670 switch (new_local_type_private) { 3671 case MATDENSE_PRIVATE: 3672 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3673 new_local_type = MATSEQAIJ; 3674 bs = 1; 3675 } else { /* if I receive only 1 dense matrix */ 3676 new_local_type = MATSEQDENSE; 3677 bs = 1; 3678 } 3679 break; 3680 case MATAIJ_PRIVATE: 3681 new_local_type = MATSEQAIJ; 3682 bs = 1; 3683 break; 3684 case MATBAIJ_PRIVATE: 3685 new_local_type = MATSEQBAIJ; 3686 break; 3687 case MATSBAIJ_PRIVATE: 3688 new_local_type = MATSEQSBAIJ; 3689 break; 3690 default: 3691 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3692 break; 3693 } 3694 } else { /* by default, new_local_type is seqdense */ 3695 new_local_type = MATSEQDENSE; 3696 bs = 1; 3697 } 3698 3699 /* create MATIS object if needed */ 3700 if (reuse == MAT_INITIAL_MATRIX) { 3701 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3702 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3703 } else { 3704 /* it also destroys the local matrices */ 3705 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3706 } 3707 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3708 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3709 3710 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3711 3712 /* Global to local map of received indices */ 3713 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3714 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3715 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3716 3717 /* restore attributes -> type of incoming data and its size */ 3718 buf_size_idxs = 0; 3719 for (i=0;i<n_recvs;i++) { 3720 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3721 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3722 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3723 } 3724 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3725 3726 /* set preallocation */ 3727 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3728 if (!newisdense) { 3729 PetscInt *new_local_nnz=0; 3730 3731 ptr_vals = recv_buffer_vals; 3732 ptr_idxs = recv_buffer_idxs_local; 3733 if (n_recvs) { 3734 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3735 } 3736 for (i=0;i<n_recvs;i++) { 3737 PetscInt j; 3738 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3739 for (j=0;j<*(ptr_idxs+1);j++) { 3740 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3741 } 3742 } else { 3743 /* TODO */ 3744 } 3745 ptr_idxs += olengths_idxs[i]; 3746 } 3747 if (new_local_nnz) { 3748 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3749 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3750 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3751 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3752 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3753 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3754 } else { 3755 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3756 } 3757 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3758 } else { 3759 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3760 } 3761 3762 /* set values */ 3763 ptr_vals = recv_buffer_vals; 3764 ptr_idxs = recv_buffer_idxs_local; 3765 for (i=0;i<n_recvs;i++) { 3766 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3767 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3768 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3769 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3770 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3771 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3772 } else { 3773 /* TODO */ 3774 } 3775 ptr_idxs += olengths_idxs[i]; 3776 ptr_vals += olengths_vals[i]; 3777 } 3778 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3779 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3780 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3781 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3782 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3783 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3784 3785 #if 0 3786 if (!restrict_comm) { /* check */ 3787 Vec lvec,rvec; 3788 PetscReal infty_error; 3789 3790 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3791 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3792 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3793 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3794 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3795 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3796 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3797 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3798 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3799 } 3800 #endif 3801 3802 /* assemble new additional is (if any) */ 3803 if (nis) { 3804 PetscInt **temp_idxs,*count_is,j,psum; 3805 3806 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3807 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3808 ptr_idxs = recv_buffer_idxs_is; 3809 psum = 0; 3810 for (i=0;i<n_recvs;i++) { 3811 for (j=0;j<nis;j++) { 3812 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3813 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3814 psum += plen; 3815 ptr_idxs += plen+1; /* shift pointer to received data */ 3816 } 3817 } 3818 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3819 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3820 for (i=1;i<nis;i++) { 3821 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3822 } 3823 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3824 ptr_idxs = recv_buffer_idxs_is; 3825 for (i=0;i<n_recvs;i++) { 3826 for (j=0;j<nis;j++) { 3827 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3828 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3829 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3830 ptr_idxs += plen+1; /* shift pointer to received data */ 3831 } 3832 } 3833 for (i=0;i<nis;i++) { 3834 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3835 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3836 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3837 } 3838 ierr = PetscFree(count_is);CHKERRQ(ierr); 3839 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3840 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3841 } 3842 /* free workspace */ 3843 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3844 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3845 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3846 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3847 if (isdense) { 3848 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3849 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3850 } else { 3851 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3852 } 3853 if (nis) { 3854 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3855 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3856 } 3857 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3858 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3859 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3860 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3861 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3862 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3863 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3864 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3865 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3866 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3867 ierr = PetscFree(onodes);CHKERRQ(ierr); 3868 if (nis) { 3869 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3870 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3871 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3872 } 3873 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3874 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3875 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3876 for (i=0;i<nis;i++) { 3877 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3878 } 3879 } 3880 PetscFunctionReturn(0); 3881 } 3882 3883 /* temporary hack into ksp private data structure */ 3884 #include <petsc-private/kspimpl.h> 3885 3886 #undef __FUNCT__ 3887 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3888 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3889 { 3890 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3891 PC_IS *pcis = (PC_IS*)pc->data; 3892 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3893 MatNullSpace CoarseNullSpace=NULL; 3894 ISLocalToGlobalMapping coarse_islg; 3895 IS coarse_is,*isarray; 3896 PetscInt i,im_active=-1,active_procs=-1; 3897 PetscInt nis,nisdofs,nisneu; 3898 PC pc_temp; 3899 PCType coarse_pc_type; 3900 KSPType coarse_ksp_type; 3901 PetscBool multilevel_requested,multilevel_allowed; 3902 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3903 Mat t_coarse_mat_is; 3904 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3905 PetscMPIInt all_procs; 3906 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3907 PetscBool compute_vecs = PETSC_FALSE; 3908 PetscScalar *array; 3909 PetscErrorCode ierr; 3910 3911 PetscFunctionBegin; 3912 /* Assign global numbering to coarse dofs */ 3913 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3914 PetscInt ocoarse_size; 3915 compute_vecs = PETSC_TRUE; 3916 ocoarse_size = pcbddc->coarse_size; 3917 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3918 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3919 /* see if we can avoid some work */ 3920 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3921 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3922 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3923 coarse_reuse = PETSC_FALSE; 3924 } else { /* we can safely reuse already computed coarse matrix */ 3925 coarse_reuse = PETSC_TRUE; 3926 } 3927 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3928 coarse_reuse = PETSC_FALSE; 3929 } 3930 /* reset any subassembling information */ 3931 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3932 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3933 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3934 coarse_reuse = PETSC_TRUE; 3935 } 3936 3937 /* count "active" (i.e. with positive local size) and "void" processes */ 3938 im_active = !!(pcis->n); 3939 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3940 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3941 void_procs = all_procs-active_procs; 3942 csin_type_simple = PETSC_TRUE; 3943 redist = PETSC_FALSE; 3944 if (pcbddc->current_level && void_procs) { 3945 csin_ml = PETSC_TRUE; 3946 ncoarse_ml = void_procs; 3947 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3948 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3949 csin_ds = PETSC_TRUE; 3950 ncoarse_ds = pcbddc->redistribute_coarse; 3951 redist = PETSC_TRUE; 3952 } else { 3953 csin_ds = PETSC_TRUE; 3954 ncoarse_ds = active_procs; 3955 redist = PETSC_TRUE; 3956 } 3957 } else { 3958 csin_ml = PETSC_FALSE; 3959 ncoarse_ml = all_procs; 3960 if (void_procs) { 3961 csin_ds = PETSC_TRUE; 3962 ncoarse_ds = void_procs; 3963 csin_type_simple = PETSC_FALSE; 3964 } else { 3965 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3966 csin_ds = PETSC_TRUE; 3967 ncoarse_ds = pcbddc->redistribute_coarse; 3968 redist = PETSC_TRUE; 3969 } else { 3970 csin_ds = PETSC_FALSE; 3971 ncoarse_ds = all_procs; 3972 } 3973 } 3974 } 3975 3976 /* 3977 test if we can go multilevel: three conditions must be satisfied: 3978 - we have not exceeded the number of levels requested 3979 - we can actually subassemble the active processes 3980 - we can find a suitable number of MPI processes where we can place the subassembled problem 3981 */ 3982 multilevel_allowed = PETSC_FALSE; 3983 multilevel_requested = PETSC_FALSE; 3984 if (pcbddc->current_level < pcbddc->max_levels) { 3985 multilevel_requested = PETSC_TRUE; 3986 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3987 multilevel_allowed = PETSC_FALSE; 3988 } else { 3989 multilevel_allowed = PETSC_TRUE; 3990 } 3991 } 3992 /* determine number of process partecipating to coarse solver */ 3993 if (multilevel_allowed) { 3994 ncoarse = ncoarse_ml; 3995 csin = csin_ml; 3996 redist = PETSC_FALSE; 3997 } else { 3998 ncoarse = ncoarse_ds; 3999 csin = csin_ds; 4000 } 4001 4002 /* creates temporary l2gmap and IS for coarse indexes */ 4003 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4004 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4005 4006 /* creates temporary MATIS object for coarse matrix */ 4007 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4008 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4009 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4010 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4011 #if 0 4012 { 4013 PetscViewer viewer; 4014 char filename[256]; 4015 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4016 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4017 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4018 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4019 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4020 } 4021 #endif 4022 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4023 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4024 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4025 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4026 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4027 4028 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4029 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4030 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4031 const PetscInt *idxs; 4032 ISLocalToGlobalMapping tmap; 4033 4034 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4035 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4036 /* allocate space for temporary storage */ 4037 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4038 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4039 /* allocate for IS array */ 4040 nisdofs = pcbddc->n_ISForDofsLocal; 4041 nisneu = !!pcbddc->NeumannBoundariesLocal; 4042 nis = nisdofs + nisneu; 4043 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4044 /* dofs splitting */ 4045 for (i=0;i<nisdofs;i++) { 4046 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4047 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4048 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4049 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4050 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4051 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4052 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4053 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4054 } 4055 /* neumann boundaries */ 4056 if (pcbddc->NeumannBoundariesLocal) { 4057 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4058 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4059 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4060 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4061 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4062 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4063 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4064 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4065 } 4066 /* free memory */ 4067 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4068 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4069 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4070 } else { 4071 nis = 0; 4072 nisdofs = 0; 4073 nisneu = 0; 4074 isarray = NULL; 4075 } 4076 /* destroy no longer needed map */ 4077 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4078 4079 /* restrict on coarse candidates (if needed) */ 4080 coarse_mat_is = NULL; 4081 if (csin) { 4082 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4083 if (redist) { 4084 PetscMPIInt rank; 4085 PetscInt spc,n_spc_p1,dest[1],destsize; 4086 4087 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4088 spc = active_procs/ncoarse; 4089 n_spc_p1 = active_procs%ncoarse; 4090 if (im_active) { 4091 destsize = 1; 4092 if (rank > n_spc_p1*(spc+1)-1) { 4093 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4094 } else { 4095 dest[0] = rank/(spc+1); 4096 } 4097 } else { 4098 destsize = 0; 4099 } 4100 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4101 } else if (csin_type_simple) { 4102 PetscMPIInt rank; 4103 PetscInt issize,isidx; 4104 4105 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4106 if (im_active) { 4107 issize = 1; 4108 isidx = (PetscInt)rank; 4109 } else { 4110 issize = 0; 4111 isidx = -1; 4112 } 4113 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4114 } else { /* get a suitable subassembling pattern from MATIS code */ 4115 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4116 } 4117 4118 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4119 if (!redist || ncoarse <= void_procs) { 4120 PetscInt ncoarse_cand,tissize,*nisindices; 4121 PetscInt *coarse_candidates; 4122 const PetscInt* tisindices; 4123 4124 /* get coarse candidates' ranks in pc communicator */ 4125 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4126 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4127 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4128 if (!coarse_candidates[i]) { 4129 coarse_candidates[ncoarse_cand++]=i; 4130 } 4131 } 4132 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4133 4134 4135 if (pcbddc->dbg_flag) { 4136 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4137 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4138 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4139 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4140 for (i=0;i<ncoarse_cand;i++) { 4141 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4142 } 4143 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4144 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4145 } 4146 /* shift the pattern on coarse candidates */ 4147 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4148 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4149 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4150 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4151 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4152 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4153 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4154 } 4155 if (pcbddc->dbg_flag) { 4156 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4158 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4160 } 4161 } 4162 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4163 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4164 } else { 4165 if (pcbddc->dbg_flag) { 4166 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4168 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4169 } 4170 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4171 coarse_mat_is = t_coarse_mat_is; 4172 } 4173 4174 /* create local to global scatters for coarse problem */ 4175 if (compute_vecs) { 4176 PetscInt lrows; 4177 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4178 if (coarse_mat_is) { 4179 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4180 } else { 4181 lrows = 0; 4182 } 4183 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4184 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4185 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4186 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4187 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4188 } 4189 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4190 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4191 4192 /* set defaults for coarse KSP and PC */ 4193 if (multilevel_allowed) { 4194 coarse_ksp_type = KSPRICHARDSON; 4195 coarse_pc_type = PCBDDC; 4196 } else { 4197 coarse_ksp_type = KSPPREONLY; 4198 coarse_pc_type = PCREDUNDANT; 4199 } 4200 4201 /* print some info if requested */ 4202 if (pcbddc->dbg_flag) { 4203 if (!multilevel_allowed) { 4204 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4205 if (multilevel_requested) { 4206 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4207 } else if (pcbddc->max_levels) { 4208 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4209 } 4210 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4211 } 4212 } 4213 4214 /* create the coarse KSP object only once with defaults */ 4215 if (coarse_mat_is) { 4216 MatReuse coarse_mat_reuse; 4217 PetscViewer dbg_viewer = NULL; 4218 if (pcbddc->dbg_flag) { 4219 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4220 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4221 } 4222 if (!pcbddc->coarse_ksp) { 4223 char prefix[256],str_level[16]; 4224 size_t len; 4225 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4226 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4227 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4228 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4229 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4230 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4231 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4232 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4233 /* prefix */ 4234 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4235 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4236 if (!pcbddc->current_level) { 4237 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4238 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4239 } else { 4240 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4241 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4242 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4243 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4244 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4245 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4246 } 4247 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4248 /* allow user customization */ 4249 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4250 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4251 } 4252 4253 /* get some info after set from options */ 4254 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4255 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4256 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4257 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4258 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4259 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4260 isbddc = PETSC_FALSE; 4261 } 4262 if (isredundant) { 4263 KSP inner_ksp; 4264 PC inner_pc; 4265 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4266 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4267 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4268 } 4269 4270 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4271 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4272 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4273 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4274 if (nisdofs) { 4275 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4276 for (i=0;i<nisdofs;i++) { 4277 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4278 } 4279 } 4280 if (nisneu) { 4281 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4282 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4283 } 4284 4285 /* assemble coarse matrix */ 4286 if (coarse_reuse) { 4287 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4288 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4289 coarse_mat_reuse = MAT_REUSE_MATRIX; 4290 } else { 4291 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4292 } 4293 if (isbddc || isnn) { 4294 if (pcbddc->coarsening_ratio > 1) { 4295 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4296 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4297 if (pcbddc->dbg_flag) { 4298 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4299 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4300 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4301 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4302 } 4303 } 4304 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4305 } else { 4306 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4307 coarse_mat = coarse_mat_is; 4308 } 4309 } else { 4310 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4311 } 4312 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4313 4314 /* propagate symmetry info to coarse matrix */ 4315 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4316 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4317 4318 /* set operators */ 4319 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4320 if (pcbddc->dbg_flag) { 4321 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4322 } 4323 } else { /* processes non partecipating to coarse solver (if any) */ 4324 coarse_mat = 0; 4325 } 4326 ierr = PetscFree(isarray);CHKERRQ(ierr); 4327 #if 0 4328 { 4329 PetscViewer viewer; 4330 char filename[256]; 4331 sprintf(filename,"coarse_mat.m"); 4332 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4333 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4334 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4335 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4336 } 4337 #endif 4338 4339 /* Compute coarse null space (special handling by BDDC only) */ 4340 if (pcbddc->NullSpace) { 4341 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4342 } 4343 4344 if (pcbddc->coarse_ksp) { 4345 Vec crhs,csol; 4346 PetscBool ispreonly; 4347 if (CoarseNullSpace) { 4348 if (isbddc) { 4349 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4350 } else { 4351 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4352 } 4353 } 4354 /* setup coarse ksp */ 4355 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4356 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4357 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4358 /* hack */ 4359 if (!csol) { 4360 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4361 } 4362 if (!crhs) { 4363 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4364 } 4365 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4366 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4367 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4368 KSP check_ksp; 4369 KSPType check_ksp_type; 4370 PC check_pc; 4371 Vec check_vec,coarse_vec; 4372 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4373 PetscInt its; 4374 PetscBool compute_eigs; 4375 PetscReal *eigs_r,*eigs_c; 4376 PetscInt neigs; 4377 const char *prefix; 4378 4379 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4380 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4381 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4382 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4383 if (ispreonly) { 4384 check_ksp_type = KSPPREONLY; 4385 compute_eigs = PETSC_FALSE; 4386 } else { 4387 check_ksp_type = KSPGMRES; 4388 compute_eigs = PETSC_TRUE; 4389 } 4390 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4391 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4392 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4393 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4394 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4395 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4396 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4397 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4398 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4399 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4400 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4401 /* create random vec */ 4402 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4403 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4404 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4405 if (CoarseNullSpace) { 4406 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4407 } 4408 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4409 /* solve coarse problem */ 4410 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4411 if (CoarseNullSpace) { 4412 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4413 } 4414 /* set eigenvalue estimation if preonly has not been requested */ 4415 if (compute_eigs) { 4416 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4417 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4418 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4419 lambda_max = eigs_r[neigs-1]; 4420 lambda_min = eigs_r[0]; 4421 if (pcbddc->use_coarse_estimates) { 4422 if (lambda_max>lambda_min) { 4423 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4424 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4425 } 4426 } 4427 } 4428 4429 /* check coarse problem residual error */ 4430 if (pcbddc->dbg_flag) { 4431 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4432 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4433 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4434 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4435 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4436 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4437 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4438 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4439 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4440 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4441 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4442 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4443 if (compute_eigs) { 4444 PetscReal lambda_max_s,lambda_min_s; 4445 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4446 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4447 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4448 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4449 for (i=0;i<neigs;i++) { 4450 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4451 } 4452 } 4453 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4454 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4455 } 4456 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4457 if (compute_eigs) { 4458 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4459 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4460 } 4461 } 4462 } 4463 /* print additional info */ 4464 if (pcbddc->dbg_flag) { 4465 /* waits until all processes reaches this point */ 4466 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4467 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4469 } 4470 4471 /* free memory */ 4472 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4473 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 #undef __FUNCT__ 4478 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4479 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4480 { 4481 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4482 PC_IS* pcis = (PC_IS*)pc->data; 4483 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4484 PetscInt i,coarse_size; 4485 PetscInt *local_primal_indices; 4486 PetscErrorCode ierr; 4487 4488 PetscFunctionBegin; 4489 /* Compute global number of coarse dofs */ 4490 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4491 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4492 } 4493 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4494 4495 /* check numbering */ 4496 if (pcbddc->dbg_flag) { 4497 PetscScalar coarsesum,*array; 4498 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4499 4500 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4501 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4502 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4503 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4504 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4505 for (i=0;i<pcbddc->local_primal_size;i++) { 4506 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4507 } 4508 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4509 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4510 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4511 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4512 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4513 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4514 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4515 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4516 for (i=0;i<pcis->n;i++) { 4517 if (array[i] == 1.0) { 4518 set_error = PETSC_TRUE; 4519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4520 } 4521 } 4522 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4523 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4524 for (i=0;i<pcis->n;i++) { 4525 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4526 } 4527 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4528 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4529 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4530 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4531 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4532 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4533 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4534 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4535 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4536 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4537 for (i=0;i<pcbddc->local_primal_size;i++) { 4538 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4539 } 4540 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4541 } 4542 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4543 if (set_error_reduced) { 4544 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4545 } 4546 } 4547 /* get back data */ 4548 *coarse_size_n = coarse_size; 4549 *local_primal_indices_n = local_primal_indices; 4550 PetscFunctionReturn(0); 4551 } 4552 4553 #undef __FUNCT__ 4554 #define __FUNCT__ "PCBDDCGlobalToLocal" 4555 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4556 { 4557 IS localis_t; 4558 PetscInt i,lsize,*idxs,n; 4559 PetscScalar *vals; 4560 PetscErrorCode ierr; 4561 4562 PetscFunctionBegin; 4563 /* get indices in local ordering exploiting local to global map */ 4564 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4565 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4566 for (i=0;i<lsize;i++) vals[i] = 1.0; 4567 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4568 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4569 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4570 if (idxs) { /* multilevel guard */ 4571 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4572 } 4573 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4574 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4575 ierr = PetscFree(vals);CHKERRQ(ierr); 4576 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4577 /* now compute set in local ordering */ 4578 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4579 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4580 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4581 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4582 for (i=0,lsize=0;i<n;i++) { 4583 if (PetscRealPart(vals[i]) > 0.5) { 4584 lsize++; 4585 } 4586 } 4587 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4588 for (i=0,lsize=0;i<n;i++) { 4589 if (PetscRealPart(vals[i]) > 0.5) { 4590 idxs[lsize++] = i; 4591 } 4592 } 4593 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4594 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4595 *localis = localis_t; 4596 PetscFunctionReturn(0); 4597 } 4598 4599 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4600 #undef __FUNCT__ 4601 #define __FUNCT__ "PCBDDCMatMult_Private" 4602 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4603 { 4604 PCBDDCChange_ctx change_ctx; 4605 PetscErrorCode ierr; 4606 4607 PetscFunctionBegin; 4608 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4609 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4610 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4611 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4612 PetscFunctionReturn(0); 4613 } 4614 4615 #undef __FUNCT__ 4616 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4617 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4618 { 4619 PCBDDCChange_ctx change_ctx; 4620 PetscErrorCode ierr; 4621 4622 PetscFunctionBegin; 4623 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4624 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4625 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4626 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4627 PetscFunctionReturn(0); 4628 } 4629 4630 #undef __FUNCT__ 4631 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4632 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4633 { 4634 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4635 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4636 PetscInt *used_xadj,*used_adjncy; 4637 PetscBool free_used_adj; 4638 PetscErrorCode ierr; 4639 4640 PetscFunctionBegin; 4641 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4642 free_used_adj = PETSC_FALSE; 4643 if (pcbddc->sub_schurs_layers == -1) { 4644 used_xadj = NULL; 4645 used_adjncy = NULL; 4646 } else { 4647 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4648 used_xadj = pcbddc->mat_graph->xadj; 4649 used_adjncy = pcbddc->mat_graph->adjncy; 4650 } else if (pcbddc->computed_rowadj) { 4651 used_xadj = pcbddc->mat_graph->xadj; 4652 used_adjncy = pcbddc->mat_graph->adjncy; 4653 } else { 4654 PetscBool flg_row=PETSC_FALSE; 4655 const PetscInt *xadj,*adjncy; 4656 PetscInt nvtxs; 4657 4658 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4659 if (flg_row) { 4660 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4661 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4662 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4663 free_used_adj = PETSC_TRUE; 4664 } else { 4665 pcbddc->sub_schurs_layers = -1; 4666 used_xadj = NULL; 4667 used_adjncy = NULL; 4668 } 4669 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4670 } 4671 } 4672 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4673 4674 /* free adjacency */ 4675 if (free_used_adj) { 4676 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4677 } 4678 PetscFunctionReturn(0); 4679 } 4680 4681 #undef __FUNCT__ 4682 #define __FUNCT__ "PCBDDCInitSubSchurs" 4683 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4684 { 4685 PC_IS *pcis=(PC_IS*)pc->data; 4686 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4687 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4688 PCBDDCGraph graph; 4689 Mat S_j; 4690 PetscErrorCode ierr; 4691 4692 PetscFunctionBegin; 4693 /* attach interface graph for determining subsets */ 4694 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4695 IS verticesIS; 4696 4697 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4698 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4699 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4700 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4701 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4702 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4703 /* 4704 if (pcbddc->dbg_flag) { 4705 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4706 } 4707 */ 4708 } else { 4709 graph = pcbddc->mat_graph; 4710 } 4711 4712 /* Create Schur complement matrix */ 4713 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4714 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4715 4716 /* sub_schurs init */ 4717 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4718 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4719 /* free graph struct */ 4720 if (pcbddc->sub_schurs_rebuild) { 4721 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4722 } 4723 PetscFunctionReturn(0); 4724 } 4725