1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) { 32 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented"); 33 } 34 35 if (pcbddc->dbg_flag) { 36 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 37 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 38 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 39 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 40 } 41 42 /* max size of subsets */ 43 mss = 0; 44 for (i=0;i<sub_schurs->n_subs;i++) { 45 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 46 PetscInt subset_size; 47 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 48 mss = PetscMax(mss,subset_size); 49 } 50 } 51 52 /* min/max and threshold */ 53 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 54 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 55 nmax = PetscMax(nmin,nmax); 56 if (pcbddc->adaptive_threshold > 1.0) { 57 thresh = 1.0/pcbddc->adaptive_threshold; 58 } else { 59 thresh = 1.0; 60 } 61 62 /* allocate lapack workspace */ 63 cum = cum2 = 0; 64 maxneigs = 0; 65 for (i=0;i<sub_schurs->n_subs;i++) { 66 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 67 PetscInt n,subset_size; 68 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 69 n = PetscMin(subset_size,nmax); 70 cum += subset_size*n; 71 cum2 += n; 72 maxneigs = PetscMax(maxneigs,n); 73 } 74 } 75 76 if (mss) { 77 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 78 PetscBLASInt B_itype = 1; 79 PetscBLASInt B_N = mss; 80 PetscReal zero = 0.0; 81 PetscReal eps = 0.0; /* dlamch? */ 82 83 B_lwork = -1; 84 S = NULL; 85 St = NULL; 86 eigs = NULL; 87 eigv = NULL; 88 B_iwork = NULL; 89 B_ifail = NULL; 90 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 91 #if defined(PETSC_USE_COMPLEX) 92 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 93 #else 94 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 95 #endif 96 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 97 ierr = PetscFPTrapPop();CHKERRQ(ierr); 98 } else { 99 /* TODO */ 100 } 101 } else { 102 lwork = 0; 103 } 104 105 nv = 0; 106 if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */ 107 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 108 } 109 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 110 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 111 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 112 #if defined(PETSC_USE_COMPLEX) 113 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 114 #endif 115 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 116 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 117 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 118 nv+cum,&pcbddc->adaptive_constraints_idxs, 119 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 120 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 121 122 maxneigs = 0; 123 cum = cum2 = cumarray = 0; 124 if (sub_schurs->is_Ej_com) { 125 const PetscInt *idxs; 126 127 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 128 for (cum=0;cum<nv;cum++) { 129 pcbddc->adaptive_constraints_n[cum] = 1; 130 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 131 pcbddc->adaptive_constraints_ptrs[cum] = cum; 132 pcbddc->adaptive_constraints_data[cum] = 1.0; 133 } 134 cum2 = cum; 135 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 136 } 137 138 if (mss) { /* multilevel */ 139 if (pcbddc->use_deluxe_scaling) { 140 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 141 } else { 142 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 143 } 144 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 145 } 146 147 if (pcbddc->dbg_flag) { 148 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 149 } 150 151 for (i=0;i<sub_schurs->n_subs;i++) { 152 PetscInt j,subset_size; 153 154 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 155 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 156 const PetscInt *idxs; 157 PetscScalar one = 1.0,scalar_zero = 0.0; 158 PetscReal zero=0.0; 159 PetscBLASInt B_N; 160 161 /* S should be copied since we need it for deluxe scaling */ 162 if (sub_schurs->is_hermitian) { 163 PetscInt j; 164 for (j=0;j<subset_size;j++) { 165 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 } else { 171 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 172 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 173 } 174 /* is always this the right matrix? */ 175 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 177 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 178 /* St = Starray+cumarray; */ 179 180 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 181 PetscBLASInt B_itype = 1; 182 PetscBLASInt B_IL = 1, B_IU; 183 PetscReal eps = -1.0; /* dlamch? */ 184 PetscInt nmin_s; 185 186 /* ask for eigenvalues lower than thresh */ 187 if (pcbddc->dbg_flag) { 188 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d.\n",i,sub_schurs->n_subs); 189 } 190 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 191 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 192 #if defined(PETSC_USE_COMPLEX) 193 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 194 #else 195 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 196 #endif 197 ierr = PetscFPTrapPop();CHKERRQ(ierr); 198 if (B_ierr) { 199 if (B_ierr < 0 ) { 200 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 201 } else if (B_ierr <= B_N) { 202 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 203 } else { 204 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 205 } 206 } 207 208 if (B_neigs > nmax) { 209 if (pcbddc->dbg_flag) { 210 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 211 } 212 B_neigs = nmax; 213 } 214 215 nmin_s = PetscMin(nmin,B_N); 216 if (B_neigs < nmin_s) { 217 PetscBLASInt B_neigs2; 218 219 B_IL = B_neigs + 1; 220 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 221 if (pcbddc->dbg_flag) { 222 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 223 } 224 if (sub_schurs->is_hermitian) { 225 PetscInt j; 226 for (j=0;j<subset_size;j++) { 227 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 228 } 229 for (j=0;j<subset_size;j++) { 230 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 231 } 232 } else { 233 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 234 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 235 } 236 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 237 #if defined(PETSC_USE_COMPLEX) 238 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 239 #else 240 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 241 #endif 242 ierr = PetscFPTrapPop();CHKERRQ(ierr); 243 B_neigs += B_neigs2; 244 } 245 if (B_ierr) { 246 if (B_ierr < 0 ) { 247 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 248 } else if (B_ierr <= B_N) { 249 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 250 } else { 251 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 252 } 253 } 254 if (pcbddc->dbg_flag) { 255 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 256 for (j=0;j<B_neigs;j++) { 257 if (eigs[j] == 0.0) { 258 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 259 } else { 260 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1.0/eigs[j]); 261 } 262 } 263 } 264 } else { 265 /* TODO */ 266 } 267 maxneigs = PetscMax(B_neigs,maxneigs); 268 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 269 270 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 271 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 272 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 273 274 if (pcbddc->dbg_flag > 1) { 275 PetscInt ii; 276 for (ii=0;ii<B_neigs;ii++) { 277 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 278 for (j=0;j<B_N;j++) { 279 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]); 280 } 281 } 282 } 283 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 284 for (j=0;j<B_neigs;j++) { 285 #if 0 286 { 287 PetscBLASInt Blas_N,Blas_one = 1.0; 288 PetscScalar norm; 289 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 290 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 291 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 292 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 293 } else { 294 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 295 } 296 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 297 } 298 #endif 299 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 300 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 301 cum2 += subset_size; 302 } 303 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 304 } 305 /* shift for next computation */ 306 cumarray += subset_size*subset_size; 307 } 308 if (pcbddc->dbg_flag) { 309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 310 } 311 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 312 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 313 314 if (mss) { 315 if (pcbddc->use_deluxe_scaling) { 316 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 317 } else { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr); 319 } 320 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 321 } 322 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 323 #if defined(PETSC_USE_COMPLEX) 324 ierr = PetscFree(rwork);CHKERRQ(ierr); 325 #endif 326 if (pcbddc->dbg_flag) { 327 PetscInt maxneigs_r; 328 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 329 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 330 } 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNCT__ 335 #define __FUNCT__ "PCBDDCSetUpSolvers" 336 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 337 { 338 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 339 PetscScalar *coarse_submat_vals; 340 PetscErrorCode ierr; 341 342 PetscFunctionBegin; 343 /* Setup local scatters R_to_B and (optionally) R_to_D */ 344 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 345 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 346 347 /* Setup local neumann solver ksp_R */ 348 /* PCBDDCSetUpLocalScatters should be called first! */ 349 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 350 351 /* Change global null space passed in by the user if change of basis has been requested */ 352 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 353 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 354 } 355 356 /* 357 Setup local correction and local part of coarse basis. 358 Gives back the dense local part of the coarse matrix in column major ordering 359 */ 360 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 361 362 /* Compute total number of coarse nodes and setup coarse solver */ 363 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 364 365 /* free */ 366 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "PCBDDCResetCustomization" 372 PetscErrorCode PCBDDCResetCustomization(PC pc) 373 { 374 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 375 PetscErrorCode ierr; 376 377 PetscFunctionBegin; 378 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 379 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 380 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 381 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 383 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 384 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 385 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 387 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 388 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 389 PetscFunctionReturn(0); 390 } 391 392 #undef __FUNCT__ 393 #define __FUNCT__ "PCBDDCResetTopography" 394 PetscErrorCode PCBDDCResetTopography(PC pc) 395 { 396 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 397 PetscErrorCode ierr; 398 399 PetscFunctionBegin; 400 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 401 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 402 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 403 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 404 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "PCBDDCResetSolvers" 410 PetscErrorCode PCBDDCResetSolvers(PC pc) 411 { 412 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 413 PetscScalar *array; 414 PetscErrorCode ierr; 415 416 PetscFunctionBegin; 417 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 418 if (pcbddc->coarse_phi_B) { 419 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 420 ierr = PetscFree(array);CHKERRQ(ierr); 421 } 422 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 423 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 424 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 425 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 426 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 427 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 428 if (pcbddc->local_auxmat2) { 429 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 430 ierr = PetscFree(array);CHKERRQ(ierr); 431 } 432 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 434 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 436 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 437 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 440 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 443 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 444 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 446 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 447 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 454 { 455 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 456 PC_IS *pcis = (PC_IS*)pc->data; 457 VecType impVecType; 458 PetscInt n_constraints,n_R,old_size; 459 PetscErrorCode ierr; 460 461 PetscFunctionBegin; 462 if (!pcbddc->ConstraintMatrix) { 463 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 464 } 465 /* get sizes */ 466 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 467 n_R = pcis->n-pcbddc->n_actual_vertices; 468 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 469 /* local work vectors (try to avoid unneeded work)*/ 470 /* R nodes */ 471 old_size = -1; 472 if (pcbddc->vec1_R) { 473 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 474 } 475 if (n_R != old_size) { 476 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 477 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 478 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 479 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 480 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 481 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 482 } 483 /* local primal dofs */ 484 old_size = -1; 485 if (pcbddc->vec1_P) { 486 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 487 } 488 if (pcbddc->local_primal_size != old_size) { 489 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 490 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 491 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 492 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 493 } 494 /* local explicit constraints */ 495 old_size = -1; 496 if (pcbddc->vec1_C) { 497 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 498 } 499 if (n_constraints && n_constraints != old_size) { 500 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 501 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 502 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 503 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 504 } 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "PCBDDCSetUpCorrection" 510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 511 { 512 PetscErrorCode ierr; 513 /* pointers to pcis and pcbddc */ 514 PC_IS* pcis = (PC_IS*)pc->data; 515 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 516 /* submatrices of local problem */ 517 Mat A_RV,A_VR,A_VV; 518 /* submatrices of local coarse problem */ 519 Mat S_VV,S_CV,S_VC,S_CC; 520 /* working matrices */ 521 Mat C_CR; 522 /* additional working stuff */ 523 PC pc_R; 524 Mat F; 525 PetscBool isLU,isCHOL,isILU; 526 527 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 528 PetscScalar *work; 529 PetscInt *idx_V_B; 530 PetscInt n,n_vertices,n_constraints; 531 PetscInt i,n_R,n_D,n_B; 532 PetscBool unsymmetric_check; 533 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 534 MatType impMatType; 535 /* some shortcuts to scalars */ 536 PetscScalar one=1.0,m_one=-1.0; 537 538 PetscFunctionBegin; 539 /* get number of vertices (corners plus constraints with change of basis) 540 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 541 n_vertices = pcbddc->n_actual_vertices; 542 n_constraints = pcbddc->local_primal_size-n_vertices; 543 /* Set Non-overlapping dimensions */ 544 n_B = pcis->n_B; n_D = pcis->n - n_B; 545 n_R = pcis->n-n_vertices; 546 547 /* Set types for local objects needed by BDDC precondtioner */ 548 impMatType = MATSEQDENSE; 549 550 /* vertices in boundary numbering */ 551 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 552 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 553 if (i != n_vertices) { 554 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 555 } 556 557 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 558 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 560 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 567 568 unsymmetric_check = PETSC_FALSE; 569 /* allocate workspace */ 570 n = 0; 571 if (n_constraints) { 572 n += n_R*n_constraints; 573 } 574 if (n_vertices) { 575 n = PetscMax(2*n_R*n_vertices,n); 576 } 577 if (!pcbddc->issym) { 578 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 579 unsymmetric_check = PETSC_TRUE; 580 } 581 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 582 583 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 584 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 585 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 586 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 587 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 588 if (isLU || isILU || isCHOL) { 589 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 590 } else { 591 F = NULL; 592 } 593 594 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 595 if (n_constraints) { 596 Mat M1,M2,M3; 597 IS is_aux; 598 /* see if we can save some allocations */ 599 if (pcbddc->local_auxmat2) { 600 PetscInt on_R,on_constraints; 601 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 602 if (on_R != n_R || on_constraints != n_constraints) { 603 PetscScalar *marray; 604 605 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 606 ierr = PetscFree(marray);CHKERRQ(ierr); 607 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 608 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 609 } 610 } 611 /* auxiliary matrices */ 612 if (!pcbddc->local_auxmat2) { 613 PetscScalar *marray; 614 615 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 616 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 617 marray += n_R*n_constraints; 618 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 619 } 620 621 /* Extract constraints on R nodes: C_{CR} */ 622 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 623 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 624 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 625 626 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 627 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 628 for (i=0;i<n_constraints;i++) { 629 const PetscScalar *row_cmat_values; 630 const PetscInt *row_cmat_indices; 631 PetscInt size_of_constraint,j; 632 633 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 634 for (j=0;j<size_of_constraint;j++) { 635 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 636 } 637 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 638 } 639 if (F) { 640 Mat B; 641 642 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 643 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 644 ierr = MatDestroy(&B);CHKERRQ(ierr); 645 } else { 646 PetscScalar *xarray; 647 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 648 for (i=0;i<n_constraints;i++) { 649 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 650 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 651 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 652 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 653 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 654 } 655 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 656 } 657 658 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 659 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 660 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 661 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 662 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 663 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 664 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 665 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 666 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 667 ierr = MatDestroy(&M2);CHKERRQ(ierr); 668 ierr = MatDestroy(&M3);CHKERRQ(ierr); 669 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 670 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 671 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 672 ierr = MatDestroy(&M1);CHKERRQ(ierr); 673 } 674 /* Get submatrices from subdomain matrix */ 675 if (n_vertices) { 676 Mat newmat; 677 IS is_aux; 678 PetscInt ibs,mbs; 679 PetscBool issbaij; 680 681 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 682 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 683 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 684 if (ibs != mbs) { /* need to convert to SEQAIJ */ 685 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 686 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 687 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 688 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 689 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 690 } else { 691 /* this is safe */ 692 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 693 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 694 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 695 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 696 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 697 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 698 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 699 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 700 } else { 701 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 702 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 703 } 704 } 705 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 706 } 707 708 /* Matrix of coarse basis functions (local) */ 709 if (pcbddc->coarse_phi_B) { 710 PetscInt on_B,on_primal,on_D=n_D; 711 if (pcbddc->coarse_phi_D) { 712 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 713 } 714 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 715 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 716 PetscScalar *marray; 717 718 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 719 ierr = PetscFree(marray);CHKERRQ(ierr); 720 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 721 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 722 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 723 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 724 } 725 } 726 727 if (!pcbddc->coarse_phi_B) { 728 PetscScalar *marray; 729 730 n = n_B*pcbddc->local_primal_size; 731 if (pcbddc->switch_static || pcbddc->dbg_flag) { 732 n += n_D*pcbddc->local_primal_size; 733 } 734 if (!pcbddc->issym) { 735 n *= 2; 736 } 737 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 738 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 739 n = n_B*pcbddc->local_primal_size; 740 if (pcbddc->switch_static || pcbddc->dbg_flag) { 741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 742 n += n_D*pcbddc->local_primal_size; 743 } 744 if (!pcbddc->issym) { 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 746 if (pcbddc->switch_static || pcbddc->dbg_flag) { 747 n = n_B*pcbddc->local_primal_size; 748 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 749 } 750 } else { 751 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 752 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 753 if (pcbddc->switch_static || pcbddc->dbg_flag) { 754 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 755 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 756 } 757 } 758 } 759 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 760 /* vertices */ 761 if (n_vertices) { 762 763 if (n_R) { 764 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 765 PetscBLASInt B_N,B_one = 1; 766 PetscScalar *x,*y; 767 768 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 769 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 770 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 771 if (F) { 772 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 773 } else { 774 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 775 for (i=0;i<n_vertices;i++) { 776 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 777 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 778 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 779 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 780 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 781 } 782 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 783 } 784 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 785 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 786 if (n_constraints) { 787 Mat B; 788 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 789 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 790 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 791 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 792 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 793 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 794 ierr = MatDestroy(&B);CHKERRQ(ierr); 795 } 796 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 797 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 798 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 799 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 800 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 801 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 802 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 803 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 804 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 805 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 806 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 807 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 808 } else { 809 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 810 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 811 } 812 /* coarse basis functions */ 813 for (i=0;i<n_vertices;i++) { 814 PetscScalar *y; 815 816 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 817 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 818 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 819 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 820 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 821 y[n_B*i+idx_V_B[i]] = 1.0; 822 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 823 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 824 825 if (pcbddc->switch_static || pcbddc->dbg_flag) { 826 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 827 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 828 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 830 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 831 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 832 } 833 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 834 } 835 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 836 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 837 } 838 839 if (n_constraints) { 840 Mat B; 841 842 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 843 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 844 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 845 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 846 if (n_vertices) { 847 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 848 } 849 ierr = MatDestroy(&B);CHKERRQ(ierr); 850 /* coarse basis functions */ 851 for (i=0;i<n_constraints;i++) { 852 PetscScalar *y; 853 854 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 855 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 856 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 857 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 858 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 860 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 861 862 if (pcbddc->switch_static || pcbddc->dbg_flag) { 863 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 864 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 865 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 868 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 869 } 870 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 871 } 872 } 873 874 /* compute other basis functions for non-symmetric problems */ 875 if (!pcbddc->issym) { 876 Mat B,X; 877 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 879 880 if (n_constraints) { 881 Mat S_CCT,B_C; 882 883 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 884 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 885 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 886 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 887 if (n_vertices) { 888 Mat B_V,S_VCT; 889 890 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 891 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 892 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 893 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 894 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 895 } 896 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 897 } 898 if (n_vertices && n_R) { 899 Mat A_VRT; 900 PetscBLASInt B_N,B_one = 1; 901 902 if (!n_constraints) { /* if there are no constraints, reset work */ 903 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 904 } 905 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 906 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 907 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 908 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 909 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 910 } 911 912 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 913 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 914 for (i=0;i<pcbddc->local_primal_size;i++) { 915 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 916 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 917 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 919 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 920 } 921 } else { 922 for (i=0;i<pcbddc->local_primal_size;i++) { 923 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 924 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 925 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 926 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 927 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 928 } 929 } 930 ierr = MatDestroy(&B);CHKERRQ(ierr); 931 /* coarse basis functions */ 932 for (i=0;i<pcbddc->local_primal_size;i++) { 933 PetscScalar *y; 934 935 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 936 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 937 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 938 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 939 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 if (i<n_vertices) { 941 y[n_B*i+idx_V_B[i]] = 1.0; 942 } 943 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 944 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 945 946 if (pcbddc->switch_static || pcbddc->dbg_flag) { 947 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 948 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 949 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 952 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 953 } 954 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 955 } 956 ierr = MatDestroy(&X);CHKERRQ(ierr); 957 } 958 959 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 960 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 961 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 962 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 963 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 964 /* Checking coarse_sub_mat and coarse basis functios */ 965 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 966 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 967 if (pcbddc->dbg_flag) { 968 Mat coarse_sub_mat; 969 Mat AUXMAT,TM1,TM2,TM3,TM4; 970 Mat coarse_phi_D,coarse_phi_B; 971 Mat coarse_psi_D,coarse_psi_B; 972 Mat A_II,A_BB,A_IB,A_BI; 973 MatType checkmattype=MATSEQAIJ; 974 PetscReal real_value; 975 976 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 977 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 978 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 979 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 980 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 981 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 982 if (unsymmetric_check) { 983 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 984 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 985 } 986 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 987 988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 989 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 990 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 991 if (unsymmetric_check) { 992 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 993 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 994 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 995 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 996 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 997 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 998 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 999 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1000 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1001 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1002 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1003 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1004 } else { 1005 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1006 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1007 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1008 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1009 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1010 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1011 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1012 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1013 } 1014 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1015 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1016 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1017 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1018 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1019 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 1020 1021 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1022 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1023 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1024 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1025 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1026 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1027 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1028 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1029 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1030 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1031 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1032 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1033 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1034 if (unsymmetric_check) { 1035 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1036 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1037 } 1038 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1039 } 1040 1041 /* free memory */ 1042 ierr = PetscFree(work);CHKERRQ(ierr); 1043 if (n_vertices) { 1044 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1045 } 1046 if (n_constraints) { 1047 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1048 } 1049 /* get back data */ 1050 *coarse_submat_vals_n = coarse_submat_vals; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1056 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1057 { 1058 Mat *work_mat; 1059 IS isrow_s,iscol_s; 1060 PetscBool rsorted,csorted; 1061 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1066 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1067 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1068 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1069 1070 if (!rsorted) { 1071 const PetscInt *idxs; 1072 PetscInt *idxs_sorted,i; 1073 1074 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1076 for (i=0;i<rsize;i++) { 1077 idxs_perm_r[i] = i; 1078 } 1079 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1080 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1081 for (i=0;i<rsize;i++) { 1082 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1083 } 1084 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1085 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1086 } else { 1087 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1088 isrow_s = isrow; 1089 } 1090 1091 if (!csorted) { 1092 if (isrow == iscol) { 1093 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1094 iscol_s = isrow_s; 1095 } else { 1096 const PetscInt *idxs; 1097 PetscInt *idxs_sorted,i; 1098 1099 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1100 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1101 for (i=0;i<csize;i++) { 1102 idxs_perm_c[i] = i; 1103 } 1104 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1105 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1106 for (i=0;i<csize;i++) { 1107 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1108 } 1109 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1110 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1111 } 1112 } else { 1113 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1114 iscol_s = iscol; 1115 } 1116 1117 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1118 1119 if (!rsorted || !csorted) { 1120 Mat new_mat; 1121 IS is_perm_r,is_perm_c; 1122 1123 if (!rsorted) { 1124 PetscInt *idxs_r,i; 1125 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1126 for (i=0;i<rsize;i++) { 1127 idxs_r[idxs_perm_r[i]] = i; 1128 } 1129 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1131 } else { 1132 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1133 } 1134 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1135 1136 if (!csorted) { 1137 if (isrow_s == iscol_s) { 1138 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1139 is_perm_c = is_perm_r; 1140 } else { 1141 PetscInt *idxs_c,i; 1142 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1143 for (i=0;i<csize;i++) { 1144 idxs_c[idxs_perm_c[i]] = i; 1145 } 1146 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1147 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1148 } 1149 } else { 1150 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1151 } 1152 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1153 1154 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1155 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1156 work_mat[0] = new_mat; 1157 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1158 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1159 } 1160 1161 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1162 *B = work_mat[0]; 1163 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1164 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1165 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1171 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1172 { 1173 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1174 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1175 Mat new_mat; 1176 IS is_local,is_global; 1177 PetscInt local_size; 1178 PetscBool isseqaij; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1183 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1184 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1185 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1186 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1187 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1188 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1189 1190 /* check */ 1191 if (pcbddc->dbg_flag) { 1192 Vec x,x_change; 1193 PetscReal error; 1194 1195 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1196 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1197 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1198 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1199 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1200 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1201 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1202 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1203 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1204 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1205 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1206 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1207 ierr = VecDestroy(&x);CHKERRQ(ierr); 1208 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1209 } 1210 1211 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1212 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1213 if (isseqaij) { 1214 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1215 } else { 1216 Mat work_mat; 1217 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1218 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1219 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1220 } 1221 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1222 /* 1223 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1224 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1225 */ 1226 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1232 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1233 { 1234 PC_IS* pcis = (PC_IS*)(pc->data); 1235 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1236 IS is_aux1,is_aux2; 1237 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1238 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1239 PetscInt vbs,bs; 1240 PetscBT bitmask; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 /* 1245 No need to setup local scatters if 1246 - primal space is unchanged 1247 AND 1248 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1249 AND 1250 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1251 */ 1252 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1253 PetscFunctionReturn(0); 1254 } 1255 /* destroy old objects */ 1256 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1257 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1258 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1259 /* Set Non-overlapping dimensions */ 1260 n_B = pcis->n_B; n_D = pcis->n - n_B; 1261 n_vertices = pcbddc->n_actual_vertices; 1262 /* create auxiliary bitmask */ 1263 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1264 for (i=0;i<n_vertices;i++) { 1265 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1266 } 1267 1268 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1269 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1270 for (i=0, n_R=0; i<pcis->n; i++) { 1271 if (!PetscBTLookup(bitmask,i)) { 1272 idx_R_local[n_R] = i; 1273 n_R++; 1274 } 1275 } 1276 1277 /* Block code */ 1278 vbs = 1; 1279 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1280 if (bs>1 && !(n_vertices%bs)) { 1281 PetscBool is_blocked = PETSC_TRUE; 1282 PetscInt *vary; 1283 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1284 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1285 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1286 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1287 for (i=0; i<n_vertices/bs; i++) { 1288 if (vary[i]!=0 && vary[i]!=bs) { 1289 is_blocked = PETSC_FALSE; 1290 break; 1291 } 1292 } 1293 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1294 vbs = bs; 1295 for (i=0;i<n_R/vbs;i++) { 1296 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1297 } 1298 } 1299 ierr = PetscFree(vary);CHKERRQ(ierr); 1300 } 1301 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1302 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1303 1304 /* print some info if requested */ 1305 if (pcbddc->dbg_flag) { 1306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1307 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1308 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1309 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1310 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1311 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1312 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1313 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1314 } 1315 1316 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1317 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1318 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1319 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1320 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1321 for (i=0; i<n_D; i++) { 1322 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1323 } 1324 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1325 for (i=0, j=0; i<n_R; i++) { 1326 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1327 aux_array1[j++] = i; 1328 } 1329 } 1330 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1331 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1332 for (i=0, j=0; i<n_B; i++) { 1333 if (!PetscBTLookup(bitmask,is_indices[i])) { 1334 aux_array2[j++] = i; 1335 } 1336 } 1337 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1338 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1339 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1340 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1341 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1342 1343 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1344 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1345 for (i=0, j=0; i<n_R; i++) { 1346 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1347 aux_array1[j++] = i; 1348 } 1349 } 1350 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1351 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1352 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1353 } 1354 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1355 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1362 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1363 { 1364 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1365 PC_IS *pcis = (PC_IS*)pc->data; 1366 PC pc_temp; 1367 Mat A_RR; 1368 MatReuse reuse; 1369 PetscScalar m_one = -1.0; 1370 PetscReal value; 1371 PetscInt n_D,n_R,ibs,mbs; 1372 PetscBool use_exact,use_exact_reduced,issbaij; 1373 PetscErrorCode ierr; 1374 /* prefixes stuff */ 1375 char dir_prefix[256],neu_prefix[256],str_level[16]; 1376 size_t len; 1377 1378 PetscFunctionBegin; 1379 1380 /* compute prefixes */ 1381 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1382 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1383 if (!pcbddc->current_level) { 1384 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1385 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1386 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1387 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1388 } else { 1389 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1390 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1391 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1392 len -= 15; /* remove "pc_bddc_coarse_" */ 1393 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1394 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1395 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1396 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1397 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1398 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1399 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1400 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1401 } 1402 1403 /* DIRICHLET PROBLEM */ 1404 if (dirichlet) { 1405 if (pcbddc->issym) { 1406 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1407 } 1408 /* Matrix for Dirichlet problem is pcis->A_II */ 1409 n_D = pcis->n - pcis->n_B; 1410 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1411 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1412 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1413 /* default */ 1414 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1415 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1416 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1417 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1418 if (issbaij) { 1419 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1420 } else { 1421 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1422 } 1423 /* Allow user's customization */ 1424 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1425 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1428 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1429 if (!n_D) { 1430 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1431 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1432 } 1433 /* Set Up KSP for Dirichlet problem of BDDC */ 1434 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1435 /* set ksp_D into pcis data */ 1436 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1437 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1438 pcis->ksp_D = pcbddc->ksp_D; 1439 } 1440 1441 /* NEUMANN PROBLEM */ 1442 A_RR = 0; 1443 if (neumann) { 1444 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1445 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1446 if (pcbddc->ksp_R) { /* already created ksp */ 1447 PetscInt nn_R; 1448 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1449 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1450 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1451 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1452 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1453 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1454 reuse = MAT_INITIAL_MATRIX; 1455 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1456 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1457 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1458 reuse = MAT_INITIAL_MATRIX; 1459 } else { /* safe to reuse the matrix */ 1460 reuse = MAT_REUSE_MATRIX; 1461 } 1462 } 1463 /* last check */ 1464 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1465 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1466 reuse = MAT_INITIAL_MATRIX; 1467 } 1468 } else { /* first time, so we need to create the matrix */ 1469 reuse = MAT_INITIAL_MATRIX; 1470 } 1471 /* extract A_RR */ 1472 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1473 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1474 if (ibs != mbs) { 1475 Mat newmat; 1476 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1477 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1478 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1479 } else { 1480 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1481 } 1482 if (pcbddc->issym) { 1483 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1484 } 1485 if (!pcbddc->ksp_R) { /* create object if not present */ 1486 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1487 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1488 /* default */ 1489 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1490 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1491 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1492 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1493 if (issbaij) { 1494 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1495 } else { 1496 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1497 } 1498 /* Allow user's customization */ 1499 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1500 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1501 } 1502 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1503 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1504 if (!n_R) { 1505 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1506 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1507 } 1508 /* Set Up KSP for Neumann problem of BDDC */ 1509 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1510 } 1511 1512 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1513 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1514 if (pcbddc->dbg_flag) { 1515 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1516 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1517 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1518 } 1519 if (dirichlet) { /* Dirichlet */ 1520 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1521 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1522 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1523 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1524 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1525 /* need to be adapted? */ 1526 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1527 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1528 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1529 /* print info */ 1530 if (pcbddc->dbg_flag) { 1531 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1532 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1533 } 1534 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1535 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1536 } 1537 } 1538 if (neumann) { /* Neumann */ 1539 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1540 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1541 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1542 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1543 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1544 /* need to be adapted? */ 1545 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1546 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1547 /* print info */ 1548 if (pcbddc->dbg_flag) { 1549 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1550 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1551 } 1552 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1553 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1554 } 1555 } 1556 } 1557 /* free Neumann problem's matrix */ 1558 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1564 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1565 { 1566 PetscErrorCode ierr; 1567 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1568 1569 PetscFunctionBegin; 1570 if (applytranspose) { 1571 if (pcbddc->local_auxmat1) { 1572 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1573 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1574 } 1575 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1576 } else { 1577 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1578 if (pcbddc->local_auxmat1) { 1579 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1580 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1581 } 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1589 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1590 { 1591 PetscErrorCode ierr; 1592 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1593 PC_IS* pcis = (PC_IS*) (pc->data); 1594 const PetscScalar zero = 0.0; 1595 1596 PetscFunctionBegin; 1597 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1598 if (applytranspose) { 1599 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1600 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1601 } else { 1602 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1603 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1604 } 1605 /* start communications from local primal nodes to rhs of coarse solver */ 1606 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1607 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 1610 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1611 /* TODO remove null space when doing multilevel */ 1612 if (pcbddc->coarse_ksp) { 1613 Vec rhs,sol; 1614 1615 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1616 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1617 if (applytranspose) { 1618 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1619 } else { 1620 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1621 } 1622 } 1623 1624 /* Local solution on R nodes */ 1625 if (pcis->n) { 1626 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1627 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1628 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1629 if (pcbddc->switch_static) { 1630 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1631 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1632 } 1633 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1634 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1635 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1636 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 if (pcbddc->switch_static) { 1638 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1639 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 } 1641 } 1642 1643 /* communications from coarse sol to local primal nodes */ 1644 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1645 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1646 1647 /* Sum contributions from two levels */ 1648 if (applytranspose) { 1649 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1650 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1651 } else { 1652 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1653 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1660 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1661 { 1662 PetscErrorCode ierr; 1663 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1664 PetscScalar *array; 1665 Vec from,to; 1666 1667 PetscFunctionBegin; 1668 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1669 from = pcbddc->coarse_vec; 1670 to = pcbddc->vec1_P; 1671 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1672 Vec tvec; 1673 1674 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1675 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1676 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1677 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1678 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1679 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1680 } 1681 } else { /* from local to global -> put data in coarse right hand side */ 1682 from = pcbddc->vec1_P; 1683 to = pcbddc->coarse_vec; 1684 } 1685 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1691 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1692 { 1693 PetscErrorCode ierr; 1694 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1695 PetscScalar *array; 1696 Vec from,to; 1697 1698 PetscFunctionBegin; 1699 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1700 from = pcbddc->coarse_vec; 1701 to = pcbddc->vec1_P; 1702 } else { /* from local to global -> put data in coarse right hand side */ 1703 from = pcbddc->vec1_P; 1704 to = pcbddc->coarse_vec; 1705 } 1706 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1707 if (smode == SCATTER_FORWARD) { 1708 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1709 Vec tvec; 1710 1711 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1712 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1713 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1714 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1715 } 1716 } else { 1717 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1718 ierr = VecResetArray(from);CHKERRQ(ierr); 1719 } 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 /* uncomment for testing purposes */ 1725 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1726 #undef __FUNCT__ 1727 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1728 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1729 { 1730 PetscErrorCode ierr; 1731 PC_IS* pcis = (PC_IS*)(pc->data); 1732 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1733 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1734 /* one and zero */ 1735 PetscScalar one=1.0,zero=0.0; 1736 /* space to store constraints and their local indices */ 1737 PetscScalar *temp_quadrature_constraint; 1738 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1739 /* iterators */ 1740 PetscInt i,j,k,total_counts,temp_start_ptr; 1741 /* BLAS integers */ 1742 PetscBLASInt lwork,lierr; 1743 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1744 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1745 /* reuse */ 1746 PetscInt olocal_primal_size; 1747 PetscInt *oprimal_indices_local_idxs; 1748 /* change of basis */ 1749 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1750 PetscBool boolforchange,qr_needed; 1751 PetscBT touched,change_basis,qr_needed_idx; 1752 /* auxiliary stuff */ 1753 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1754 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1755 PetscScalar *temp_quadrature_constraint_work=NULL; 1756 /* some quantities */ 1757 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1758 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1759 1760 PetscFunctionBegin; 1761 /* Destroy Mat objects computed previously */ 1762 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1763 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1764 1765 /* print some info */ 1766 if (pcbddc->dbg_flag) { 1767 IS vertices; 1768 PetscInt nv,nedges,nfaces; 1769 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1770 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1771 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1772 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1773 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1774 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1775 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1776 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1777 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1778 } 1779 1780 if (!pcbddc->adaptive_selection) { 1781 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1782 MatNullSpace nearnullsp; 1783 const Vec *nearnullvecs; 1784 Vec *localnearnullsp; 1785 PetscScalar *array; 1786 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1787 PetscBool nnsp_has_cnst; 1788 /* LAPACK working arrays for SVD or POD */ 1789 PetscBool skip_lapack; 1790 PetscScalar *work; 1791 PetscReal *singular_vals; 1792 #if defined(PETSC_USE_COMPLEX) 1793 PetscReal *rwork; 1794 #endif 1795 #if defined(PETSC_MISSING_LAPACK_GESVD) 1796 PetscScalar *temp_basis,*correlation_mat; 1797 #else 1798 PetscBLASInt dummy_int=1; 1799 PetscScalar dummy_scalar=1.; 1800 #endif 1801 1802 /* Get index sets for faces, edges and vertices from graph */ 1803 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1804 /* free unneeded index sets */ 1805 if (!pcbddc->use_vertices) { 1806 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1807 } 1808 if (!pcbddc->use_edges) { 1809 for (i=0;i<n_ISForEdges;i++) { 1810 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1811 } 1812 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1813 n_ISForEdges = 0; 1814 } 1815 if (!pcbddc->use_faces) { 1816 for (i=0;i<n_ISForFaces;i++) { 1817 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1818 } 1819 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1820 n_ISForFaces = 0; 1821 } 1822 /* HACKS (the following two blocks of code) */ 1823 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1824 pcbddc->use_change_of_basis = PETSC_TRUE; 1825 if (!ISForEdges) { 1826 pcbddc->use_change_on_faces = PETSC_TRUE; 1827 } 1828 } 1829 if (pcbddc->NullSpace) { 1830 /* use_change_of_basis should be consistent among processors */ 1831 PetscBool tbool[2],gbool[2]; 1832 tbool [0] = pcbddc->use_change_of_basis; 1833 tbool [1] = pcbddc->use_change_on_faces; 1834 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1835 pcbddc->use_change_of_basis = gbool[0]; 1836 pcbddc->use_change_on_faces = gbool[1]; 1837 } 1838 1839 /* check if near null space is attached to global mat */ 1840 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1841 if (nearnullsp) { 1842 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1843 /* remove any stored info */ 1844 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1845 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1846 /* store information for BDDC solver reuse */ 1847 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1848 pcbddc->onearnullspace = nearnullsp; 1849 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1850 for (i=0;i<nnsp_size;i++) { 1851 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1852 } 1853 } else { /* if near null space is not provided BDDC uses constants by default */ 1854 nnsp_size = 0; 1855 nnsp_has_cnst = PETSC_TRUE; 1856 } 1857 /* get max number of constraints on a single cc */ 1858 max_constraints = nnsp_size; 1859 if (nnsp_has_cnst) max_constraints++; 1860 1861 /* 1862 Evaluate maximum storage size needed by the procedure 1863 - temp_indices will contain start index of each constraint stored as follows 1864 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1865 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1866 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1867 */ 1868 total_counts = n_ISForFaces+n_ISForEdges; 1869 total_counts *= max_constraints; 1870 n_vertices = 0; 1871 if (ISForVertices) { 1872 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1873 } 1874 total_counts += n_vertices; 1875 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1876 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1877 total_counts = 0; 1878 max_size_of_constraint = 0; 1879 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1880 if (i<n_ISForEdges) { 1881 used_IS = &ISForEdges[i]; 1882 } else { 1883 used_IS = &ISForFaces[i-n_ISForEdges]; 1884 } 1885 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1886 total_counts += j; 1887 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1888 } 1889 total_counts *= max_constraints; 1890 total_counts += n_vertices; 1891 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1892 /* get local part of global near null space vectors */ 1893 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1894 for (k=0;k<nnsp_size;k++) { 1895 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1896 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1897 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1898 } 1899 1900 /* whether or not to skip lapack calls */ 1901 skip_lapack = PETSC_TRUE; 1902 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1903 1904 /* allocate some auxiliary stuff */ 1905 if (!skip_lapack || pcbddc->use_qr_single) { 1906 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1907 } else { 1908 gidxs = NULL; 1909 permutation = NULL; 1910 temp_indices_to_constraint_work = NULL; 1911 temp_quadrature_constraint_work = NULL; 1912 } 1913 1914 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1915 if (!skip_lapack) { 1916 PetscScalar temp_work; 1917 1918 #if defined(PETSC_MISSING_LAPACK_GESVD) 1919 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1920 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1921 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1922 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1923 #if defined(PETSC_USE_COMPLEX) 1924 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1925 #endif 1926 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1927 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1928 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1929 lwork = -1; 1930 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1931 #if !defined(PETSC_USE_COMPLEX) 1932 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1933 #else 1934 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1935 #endif 1936 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1937 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1938 #else /* on missing GESVD */ 1939 /* SVD */ 1940 PetscInt max_n,min_n; 1941 max_n = max_size_of_constraint; 1942 min_n = max_constraints; 1943 if (max_size_of_constraint < max_constraints) { 1944 min_n = max_size_of_constraint; 1945 max_n = max_constraints; 1946 } 1947 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1948 #if defined(PETSC_USE_COMPLEX) 1949 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1950 #endif 1951 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1952 lwork = -1; 1953 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1954 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1955 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1956 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1957 #if !defined(PETSC_USE_COMPLEX) 1958 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1959 #else 1960 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1961 #endif 1962 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1963 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1964 #endif /* on missing GESVD */ 1965 /* Allocate optimal workspace */ 1966 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1967 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1968 } 1969 /* Now we can loop on constraining sets */ 1970 total_counts = 0; 1971 temp_indices[0] = 0; 1972 /* vertices */ 1973 if (ISForVertices) { 1974 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1975 if (nnsp_has_cnst) { /* consider all vertices */ 1976 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1977 for (i=0;i<n_vertices;i++) { 1978 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1979 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1980 total_counts++; 1981 } 1982 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1983 PetscBool used_vertex; 1984 for (i=0;i<n_vertices;i++) { 1985 used_vertex = PETSC_FALSE; 1986 k = 0; 1987 while (!used_vertex && k<nnsp_size) { 1988 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1989 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1990 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1991 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1992 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1993 total_counts++; 1994 used_vertex = PETSC_TRUE; 1995 } 1996 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1997 k++; 1998 } 1999 } 2000 } 2001 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2002 n_vertices = total_counts; 2003 } 2004 2005 /* edges and faces */ 2006 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2007 if (ncc<n_ISForEdges) { 2008 used_IS = &ISForEdges[ncc]; 2009 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2010 } else { 2011 used_IS = &ISForFaces[ncc-n_ISForEdges]; 2012 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2013 } 2014 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2015 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2016 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2017 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2018 /* change of basis should not be performed on local periodic nodes */ 2019 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2020 if (nnsp_has_cnst) { 2021 PetscScalar quad_value; 2022 temp_constraints++; 2023 if (!pcbddc->use_nnsp_true) { 2024 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2025 } else { 2026 quad_value = 1.0; 2027 } 2028 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2029 for (j=0;j<size_of_constraint;j++) { 2030 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2031 } 2032 /* sort by global ordering if using lapack subroutines (not needed!) */ 2033 if (!skip_lapack || pcbddc->use_qr_single) { 2034 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2035 for (j=0;j<size_of_constraint;j++) { 2036 permutation[j]=j; 2037 } 2038 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2039 for (j=0;j<size_of_constraint;j++) { 2040 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2041 } 2042 for (j=0;j<size_of_constraint;j++) { 2043 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2044 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2045 } 2046 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2047 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2048 } 2049 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2050 total_counts++; 2051 } 2052 for (k=0;k<nnsp_size;k++) { 2053 PetscReal real_value; 2054 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2055 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2056 for (j=0;j<size_of_constraint;j++) { 2057 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2058 } 2059 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2060 /* check if array is null on the connected component */ 2061 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2062 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2063 if (real_value > 0.0) { /* keep indices and values */ 2064 /* sort by global ordering if using lapack subroutines */ 2065 if (!skip_lapack || pcbddc->use_qr_single) { 2066 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2067 for (j=0;j<size_of_constraint;j++) { 2068 permutation[j]=j; 2069 } 2070 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2071 for (j=0;j<size_of_constraint;j++) { 2072 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2073 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2074 } 2075 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2076 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2077 } 2078 temp_constraints++; 2079 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2080 total_counts++; 2081 } 2082 } 2083 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2084 valid_constraints = temp_constraints; 2085 if (!pcbddc->use_nnsp_true && temp_constraints) { 2086 if (temp_constraints == 1) { /* just normalize the constraint */ 2087 PetscScalar norm; 2088 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2089 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2090 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2091 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2092 } else { /* perform SVD */ 2093 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2094 2095 #if defined(PETSC_MISSING_LAPACK_GESVD) 2096 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2097 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2098 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2099 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2100 from that computed using LAPACKgesvd 2101 -> This is due to a different computation of eigenvectors in LAPACKheev 2102 -> The quality of the POD-computed basis will be the same */ 2103 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2104 /* Store upper triangular part of correlation matrix */ 2105 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2106 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2107 for (j=0;j<temp_constraints;j++) { 2108 for (k=0;k<j+1;k++) { 2109 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2110 } 2111 } 2112 /* compute eigenvalues and eigenvectors of correlation matrix */ 2113 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2114 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2115 #if !defined(PETSC_USE_COMPLEX) 2116 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2117 #else 2118 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2119 #endif 2120 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2121 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2122 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2123 j = 0; 2124 while (j < temp_constraints && singular_vals[j] < tol) j++; 2125 total_counts = total_counts-j; 2126 valid_constraints = temp_constraints-j; 2127 /* scale and copy POD basis into used quadrature memory */ 2128 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2129 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2130 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2131 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2132 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2133 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2134 if (j<temp_constraints) { 2135 PetscInt ii; 2136 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2137 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2138 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2139 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2140 for (k=0;k<temp_constraints-j;k++) { 2141 for (ii=0;ii<size_of_constraint;ii++) { 2142 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2143 } 2144 } 2145 } 2146 #else /* on missing GESVD */ 2147 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2148 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2150 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2151 #if !defined(PETSC_USE_COMPLEX) 2152 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2153 #else 2154 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2155 #endif 2156 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2157 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2158 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2159 k = temp_constraints; 2160 if (k > size_of_constraint) k = size_of_constraint; 2161 j = 0; 2162 while (j < k && singular_vals[k-j-1] < tol) j++; 2163 valid_constraints = k-j; 2164 total_counts = total_counts-temp_constraints+valid_constraints; 2165 #endif /* on missing GESVD */ 2166 } 2167 } 2168 /* setting change_of_basis flag is safe now */ 2169 if (boolforchange) { 2170 for (j=0;j<valid_constraints;j++) { 2171 PetscBTSet(change_basis,total_counts-j-1); 2172 } 2173 } 2174 } 2175 /* free workspace */ 2176 if (!skip_lapack || pcbddc->use_qr_single) { 2177 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2178 } 2179 if (!skip_lapack) { 2180 ierr = PetscFree(work);CHKERRQ(ierr); 2181 #if defined(PETSC_USE_COMPLEX) 2182 ierr = PetscFree(rwork);CHKERRQ(ierr); 2183 #endif 2184 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2185 #if defined(PETSC_MISSING_LAPACK_GESVD) 2186 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2187 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2188 #endif 2189 } 2190 for (k=0;k<nnsp_size;k++) { 2191 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2192 } 2193 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2194 /* free index sets of faces, edges and vertices */ 2195 for (i=0;i<n_ISForFaces;i++) { 2196 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2197 } 2198 if (n_ISForFaces) { 2199 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2200 } 2201 for (i=0;i<n_ISForEdges;i++) { 2202 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2203 } 2204 if (n_ISForEdges) { 2205 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2206 } 2207 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2208 } else { 2209 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2210 PetscInt cum = 0; 2211 2212 total_counts = 0; 2213 n_vertices = 0; 2214 if (sub_schurs->is_Ej_com) { 2215 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2216 } 2217 max_constraints = 0; 2218 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2219 total_counts += pcbddc->adaptive_constraints_n[i]; 2220 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2221 } 2222 temp_indices = pcbddc->adaptive_constraints_ptrs; 2223 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2224 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2225 2226 #if 0 2227 printf("Found %d totals\n",total_counts); 2228 for (i=0;i<total_counts;i++) { 2229 printf("const %d, start %d",i,temp_indices[i]); 2230 printf(" end %d:\n",temp_indices[i+1]); 2231 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2232 printf(" idxs %d",temp_indices_to_constraint[j]); 2233 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2234 } 2235 } 2236 for (i=0;i<n_vertices;i++) { 2237 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]); 2238 } 2239 for (i=0;i<sub_schurs->n_subs;i++) { 2240 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2241 } 2242 #endif 2243 2244 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2245 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2246 /* Change of basis */ 2247 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2248 if (pcbddc->use_change_of_basis) { 2249 cum = n_vertices; 2250 for (i=0;i<sub_schurs->n_subs;i++) { 2251 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2252 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2253 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2254 } 2255 } 2256 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2257 } 2258 } 2259 } 2260 2261 /* map temp_indices_to_constraint in boundary numbering */ 2262 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2263 if (i != temp_indices[total_counts]) { 2264 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2265 } 2266 2267 /* set quantities in pcbddc data structure and store previous primal size */ 2268 /* n_vertices defines the number of subdomain corners in the primal space */ 2269 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2270 olocal_primal_size = pcbddc->local_primal_size; 2271 pcbddc->local_primal_size = total_counts; 2272 pcbddc->n_vertices = n_vertices; 2273 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2274 2275 /* Create constraint matrix */ 2276 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2277 /* so we need to set it up properly either with or without change of basis */ 2278 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2279 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2280 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2281 /* array to compute a local numbering of constraints : vertices first then constraints */ 2282 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2283 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2284 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2285 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2286 /* auxiliary stuff for basis change */ 2287 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2288 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2289 2290 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2291 total_primal_vertices=0; 2292 for (i=0;i<pcbddc->local_primal_size;i++) { 2293 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2294 if (size_of_constraint == 1) { 2295 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2296 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2297 aux_primal_minloc[total_primal_vertices]=0; 2298 total_primal_vertices++; 2299 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2300 PetscInt min_loc,min_index; 2301 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2302 /* find first untouched local node */ 2303 k = 0; 2304 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2305 min_index = global_indices[k]; 2306 min_loc = k; 2307 /* search the minimum among global nodes already untouched on the cc */ 2308 for (k=1;k<size_of_constraint;k++) { 2309 /* there can be more than one constraint on a single connected component */ 2310 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2311 min_index = global_indices[k]; 2312 min_loc = k; 2313 } 2314 } 2315 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2316 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2317 aux_primal_minloc[total_primal_vertices]=min_loc; 2318 total_primal_vertices++; 2319 } 2320 } 2321 /* determine if a QR strategy is needed for change of basis */ 2322 qr_needed = PETSC_FALSE; 2323 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2324 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2325 if (PetscBTLookup(change_basis,i)) { 2326 if (!pcbddc->use_qr_single) { 2327 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2328 j = 0; 2329 for (k=0;k<size_of_constraint;k++) { 2330 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2331 j++; 2332 } 2333 } 2334 /* found more than one primal dof on the cc */ 2335 if (j > 1) { 2336 PetscBTSet(qr_needed_idx,i); 2337 qr_needed = PETSC_TRUE; 2338 } 2339 } else { 2340 PetscBTSet(qr_needed_idx,i); 2341 qr_needed = PETSC_TRUE; 2342 } 2343 } 2344 } 2345 /* free workspace */ 2346 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2347 2348 /* permute indices in order to have a sorted set of vertices */ 2349 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2350 2351 /* nonzero structure of constraint matrix */ 2352 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2353 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2354 j=total_primal_vertices; 2355 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2356 if (!PetscBTLookup(change_basis,i)) { 2357 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2358 j++; 2359 } 2360 } 2361 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2362 ierr = PetscFree(nnz);CHKERRQ(ierr); 2363 /* set values in constraint matrix */ 2364 for (i=0;i<total_primal_vertices;i++) { 2365 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2366 } 2367 total_counts = total_primal_vertices; 2368 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2369 if (!PetscBTLookup(change_basis,i)) { 2370 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2371 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2372 total_counts++; 2373 } 2374 } 2375 /* assembling */ 2376 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2377 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2378 /* 2379 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2380 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2381 */ 2382 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2383 if (pcbddc->use_change_of_basis) { 2384 /* dual and primal dofs on a single cc */ 2385 PetscInt dual_dofs,primal_dofs; 2386 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2387 PetscInt primal_counter; 2388 /* working stuff for GEQRF */ 2389 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2390 PetscBLASInt lqr_work; 2391 /* working stuff for UNGQR */ 2392 PetscScalar *gqr_work,lgqr_work_t; 2393 PetscBLASInt lgqr_work; 2394 /* working stuff for TRTRS */ 2395 PetscScalar *trs_rhs; 2396 PetscBLASInt Blas_NRHS; 2397 /* pointers for values insertion into change of basis matrix */ 2398 PetscInt *start_rows,*start_cols; 2399 PetscScalar *start_vals; 2400 /* working stuff for values insertion */ 2401 PetscBT is_primal; 2402 /* matrix sizes */ 2403 PetscInt global_size,local_size; 2404 /* temporary change of basis */ 2405 Mat localChangeOfBasisMatrix; 2406 /* extra space for debugging */ 2407 PetscScalar *dbg_work; 2408 2409 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2410 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2411 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2412 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2413 /* nonzeros for local mat */ 2414 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2415 for (i=0;i<pcis->n;i++) nnz[i]=1; 2416 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2417 if (PetscBTLookup(change_basis,i)) { 2418 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2419 if (PetscBTLookup(qr_needed_idx,i)) { 2420 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2421 } else { 2422 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2423 /* get local primal index on the cc */ 2424 j = 0; 2425 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2426 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2427 } 2428 } 2429 } 2430 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2431 ierr = PetscFree(nnz);CHKERRQ(ierr); 2432 /* Set initial identity in the matrix */ 2433 for (i=0;i<pcis->n;i++) { 2434 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2435 } 2436 2437 if (pcbddc->dbg_flag) { 2438 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2439 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2440 } 2441 2442 2443 /* Now we loop on the constraints which need a change of basis */ 2444 /* 2445 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2446 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2447 2448 Basic blocks of change of basis matrix T computed by 2449 2450 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2451 2452 | 1 0 ... 0 s_1/S | 2453 | 0 1 ... 0 s_2/S | 2454 | ... | 2455 | 0 ... 1 s_{n-1}/S | 2456 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2457 2458 with S = \sum_{i=1}^n s_i^2 2459 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2460 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2461 2462 - QR decomposition of constraints otherwise 2463 */ 2464 if (qr_needed) { 2465 /* space to store Q */ 2466 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2467 /* first we issue queries for optimal work */ 2468 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2469 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2470 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2471 lqr_work = -1; 2472 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2473 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2474 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2475 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2476 lgqr_work = -1; 2477 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2478 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2479 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2480 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2481 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2482 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2483 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2484 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2485 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2486 /* array to store scaling factors for reflectors */ 2487 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2488 /* array to store rhs and solution of triangular solver */ 2489 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2490 /* allocating workspace for check */ 2491 if (pcbddc->dbg_flag) { 2492 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2493 } 2494 } 2495 /* array to store whether a node is primal or not */ 2496 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2497 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2498 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2499 if (i != total_primal_vertices) { 2500 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2501 } 2502 for (i=0;i<total_primal_vertices;i++) { 2503 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2504 } 2505 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2506 2507 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2508 /* -> using implicit ordering contained in temp_indices data */ 2509 total_counts = pcbddc->n_vertices; 2510 primal_counter = total_counts; 2511 while (total_counts<pcbddc->local_primal_size) { 2512 primal_dofs = 1; 2513 if (PetscBTLookup(change_basis,total_counts)) { 2514 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2515 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2516 primal_dofs++; 2517 } 2518 /* get constraint info */ 2519 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2520 dual_dofs = size_of_constraint-primal_dofs; 2521 2522 if (pcbddc->dbg_flag) { 2523 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2524 } 2525 2526 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2527 2528 /* copy quadrature constraints for change of basis check */ 2529 if (pcbddc->dbg_flag) { 2530 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2531 } 2532 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2533 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2534 2535 /* compute QR decomposition of constraints */ 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2538 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2539 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2540 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2541 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2542 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2543 2544 /* explictly compute R^-T */ 2545 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2546 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2547 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2548 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2550 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2552 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2553 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2554 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2555 2556 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2557 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2558 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2559 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2560 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2561 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2562 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2563 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2564 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2565 2566 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2567 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2568 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2569 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2571 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2572 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2573 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2574 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2575 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2576 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2577 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2578 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2579 2580 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2581 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2582 /* insert cols for primal dofs */ 2583 for (j=0;j<primal_dofs;j++) { 2584 start_vals = &qr_basis[j*size_of_constraint]; 2585 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2586 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2587 } 2588 /* insert cols for dual dofs */ 2589 for (j=0,k=0;j<dual_dofs;k++) { 2590 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2591 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2592 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2593 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2594 j++; 2595 } 2596 } 2597 2598 /* check change of basis */ 2599 if (pcbddc->dbg_flag) { 2600 PetscInt ii,jj; 2601 PetscBool valid_qr=PETSC_TRUE; 2602 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2603 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2604 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2605 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2606 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2607 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2608 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2609 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2610 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2611 for (jj=0;jj<size_of_constraint;jj++) { 2612 for (ii=0;ii<primal_dofs;ii++) { 2613 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2614 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2615 } 2616 } 2617 if (!valid_qr) { 2618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2619 for (jj=0;jj<size_of_constraint;jj++) { 2620 for (ii=0;ii<primal_dofs;ii++) { 2621 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2622 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2623 } 2624 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2625 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2626 } 2627 } 2628 } 2629 } else { 2630 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2631 } 2632 } 2633 } else { /* simple transformation block */ 2634 PetscInt row,col; 2635 PetscScalar val,norm; 2636 2637 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2638 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2639 for (j=0;j<size_of_constraint;j++) { 2640 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2641 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2642 if (!PetscBTLookup(is_primal,row_B)) { 2643 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2644 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2645 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2646 } else { 2647 for (k=0;k<size_of_constraint;k++) { 2648 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2649 if (row != col) { 2650 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2651 } else { 2652 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2653 } 2654 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2655 } 2656 } 2657 } 2658 if (pcbddc->dbg_flag) { 2659 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2660 } 2661 } 2662 /* increment primal counter */ 2663 primal_counter += primal_dofs; 2664 } else { 2665 if (pcbddc->dbg_flag) { 2666 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2667 } 2668 } 2669 /* increment constraint counter total_counts */ 2670 total_counts += primal_dofs; 2671 } 2672 2673 /* free workspace */ 2674 if (qr_needed) { 2675 if (pcbddc->dbg_flag) { 2676 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2677 } 2678 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2679 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2680 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2681 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2682 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2683 } 2684 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2685 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2686 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2687 2688 /* assembling of global change of variable */ 2689 { 2690 Mat tmat; 2691 PetscInt bs; 2692 2693 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2694 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2695 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2696 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2697 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2698 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2699 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2700 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2701 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2702 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2703 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2704 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2705 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2706 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2707 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2708 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2709 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2710 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2711 } 2712 /* check */ 2713 if (pcbddc->dbg_flag) { 2714 PetscReal error; 2715 Vec x,x_change; 2716 2717 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2718 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2719 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2720 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2721 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2722 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2723 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2724 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2725 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2726 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2727 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2728 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2729 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2730 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2731 ierr = VecDestroy(&x);CHKERRQ(ierr); 2732 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2733 } 2734 2735 /* adapt sub_schurs computed (if any) */ 2736 if (pcbddc->use_deluxe_scaling) { 2737 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2738 if (sub_schurs->n_subs_par_g) { 2739 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented"); 2740 } 2741 if (sub_schurs->S_Ej_all) { 2742 Mat S_1,S_2,tmat; 2743 IS is_all_N; 2744 2745 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2746 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2747 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2748 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2749 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2750 sub_schurs->S_Ej_all = S_1; 2751 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2752 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2753 sub_schurs->sum_S_Ej_all = S_2; 2754 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2755 } 2756 } 2757 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2758 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2759 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2760 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2761 } 2762 2763 /* set up change of basis context */ 2764 if (pcbddc->ChangeOfBasisMatrix) { 2765 PCBDDCChange_ctx change_ctx; 2766 2767 if (!pcbddc->new_global_mat) { 2768 PetscInt global_size,local_size; 2769 2770 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2771 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2772 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2773 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2774 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2775 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2776 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2777 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2778 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2779 } else { 2780 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2781 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2782 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2783 } 2784 if (!pcbddc->user_ChangeOfBasisMatrix) { 2785 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2786 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2787 } else { 2788 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2789 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2790 } 2791 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2792 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2793 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2794 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2795 } 2796 2797 /* get indices in local ordering for vertices and constraints */ 2798 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2799 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2800 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2801 } 2802 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2803 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2804 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2805 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2806 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2807 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2808 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2809 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2810 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2811 /* set quantities in PCBDDC data struct */ 2812 pcbddc->n_actual_vertices = i; 2813 /* check if a new primal space has been introduced */ 2814 pcbddc->new_primal_space_local = PETSC_TRUE; 2815 if (olocal_primal_size == pcbddc->local_primal_size) { 2816 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2817 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2818 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2819 } 2820 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2821 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2822 2823 /* flush dbg viewer */ 2824 if (pcbddc->dbg_flag) { 2825 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2826 } 2827 2828 /* free workspace */ 2829 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2830 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2831 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2832 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2833 if (!pcbddc->adaptive_selection) { 2834 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2835 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2836 } else { 2837 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2838 pcbddc->adaptive_constraints_ptrs, 2839 pcbddc->adaptive_constraints_idxs, 2840 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2841 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2842 } 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2848 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2849 { 2850 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2851 PC_IS *pcis = (PC_IS*)pc->data; 2852 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2853 PetscInt ierr,i,vertex_size; 2854 PetscViewer viewer=pcbddc->dbg_viewer; 2855 2856 PetscFunctionBegin; 2857 /* Reset previously computed graph */ 2858 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2859 /* Init local Graph struct */ 2860 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2861 2862 /* Check validity of the csr graph passed in by the user */ 2863 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2864 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2865 } 2866 2867 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2868 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2869 PetscInt *xadj,*adjncy; 2870 PetscInt nvtxs; 2871 2872 if (pcbddc->use_local_adj) { 2873 PetscBool flg_row=PETSC_FALSE; 2874 2875 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2876 if (flg_row) { 2877 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2878 pcbddc->computed_rowadj = PETSC_TRUE; 2879 } 2880 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2881 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2882 IS is_dummy; 2883 ISLocalToGlobalMapping l2gmap_dummy; 2884 PetscInt j,sum; 2885 PetscInt *cxadj,*cadjncy; 2886 const PetscInt *idxs; 2887 PCBDDCGraph graph; 2888 PetscBT is_on_boundary; 2889 2890 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2891 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2892 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2893 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2894 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2895 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2896 graph->xadj = xadj; 2897 graph->adjncy = adjncy; 2898 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2899 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2900 2901 if (pcbddc->dbg_flag) { 2902 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2903 for (i=0;i<graph->ncc;i++) { 2904 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2905 } 2906 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2907 } 2908 2909 ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr); 2910 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2911 for (i=0;i<pcis->n_B;i++) { 2912 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2913 } 2914 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2915 2916 ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr); 2917 sum = 0; 2918 for (i=0;i<graph->ncc;i++) { 2919 PetscInt sizecc = 0; 2920 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2921 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2922 sizecc++; 2923 } 2924 } 2925 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2926 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2927 cxadj[graph->queue[j]] = sizecc; 2928 } 2929 } 2930 sum += sizecc*sizecc; 2931 } 2932 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2933 sum = 0; 2934 for (i=0;i<nvtxs;i++) { 2935 PetscInt temp = cxadj[i]; 2936 cxadj[i] = sum; 2937 sum += temp; 2938 } 2939 cxadj[nvtxs] = sum; 2940 for (i=0;i<graph->ncc;i++) { 2941 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2942 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2943 PetscInt k,sizecc = 0; 2944 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2945 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2946 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2947 sizecc++; 2948 } 2949 } 2950 } 2951 } 2952 } 2953 if (nvtxs) { 2954 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2955 } else { 2956 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2957 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2958 } 2959 graph->xadj = 0; 2960 graph->adjncy = 0; 2961 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2962 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2963 } 2964 } 2965 2966 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2967 vertex_size = 1; 2968 if (pcbddc->user_provided_isfordofs) { 2969 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2970 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2971 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2972 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2973 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2974 } 2975 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2976 pcbddc->n_ISForDofs = 0; 2977 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2978 } 2979 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2980 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2981 } else { 2982 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2983 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2984 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2985 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2986 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2987 } 2988 } 2989 } 2990 2991 /* Setup of Graph */ 2992 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2993 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2994 } 2995 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2996 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2997 } 2998 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 2999 3000 /* Graph's connected components analysis */ 3001 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3002 3003 /* print some info to stdout */ 3004 if (pcbddc->dbg_flag) { 3005 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 3006 } 3007 3008 /* mark topography has done */ 3009 pcbddc->recompute_topography = PETSC_FALSE; 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3015 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3016 { 3017 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3018 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3019 PetscErrorCode ierr; 3020 3021 PetscFunctionBegin; 3022 n = 0; 3023 vertices = 0; 3024 if (pcbddc->ConstraintMatrix) { 3025 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3026 for (i=0;i<local_primal_size;i++) { 3027 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3028 if (size_of_constraint == 1) n++; 3029 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3030 } 3031 if (vertices_idx) { 3032 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3033 n = 0; 3034 for (i=0;i<local_primal_size;i++) { 3035 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3036 if (size_of_constraint == 1) { 3037 vertices[n++]=row_cmat_indices[0]; 3038 } 3039 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3040 } 3041 } 3042 } 3043 *n_vertices = n; 3044 if (vertices_idx) *vertices_idx = vertices; 3045 PetscFunctionReturn(0); 3046 } 3047 3048 #undef __FUNCT__ 3049 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3050 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3051 { 3052 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3053 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3054 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3055 PetscBT touched; 3056 PetscErrorCode ierr; 3057 3058 /* This function assumes that the number of local constraints per connected component 3059 is not greater than the number of nodes defined for the connected component 3060 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3061 PetscFunctionBegin; 3062 n = 0; 3063 constraints_index = 0; 3064 if (pcbddc->ConstraintMatrix) { 3065 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3066 max_size_of_constraint = 0; 3067 for (i=0;i<local_primal_size;i++) { 3068 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3069 if (size_of_constraint > 1) { 3070 n++; 3071 } 3072 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3073 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3074 } 3075 if (constraints_idx) { 3076 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3077 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3078 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3079 n = 0; 3080 for (i=0;i<local_primal_size;i++) { 3081 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3082 if (size_of_constraint > 1) { 3083 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3084 /* find first untouched local node */ 3085 j = 0; 3086 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3087 min_index = row_cmat_global_indices[j]; 3088 min_loc = j; 3089 /* search the minimum among nodes not yet touched on the connected component 3090 since there can be more than one constraint on a single cc */ 3091 for (j=1;j<size_of_constraint;j++) { 3092 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3093 min_index = row_cmat_global_indices[j]; 3094 min_loc = j; 3095 } 3096 } 3097 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3098 constraints_index[n++] = row_cmat_indices[min_loc]; 3099 } 3100 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3101 } 3102 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3103 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3104 } 3105 } 3106 *n_constraints = n; 3107 if (constraints_idx) *constraints_idx = constraints_index; 3108 PetscFunctionReturn(0); 3109 } 3110 3111 #undef __FUNCT__ 3112 #define __FUNCT__ "PCBDDCSubsetNumbering" 3113 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3114 { 3115 Vec local_vec,global_vec; 3116 IS seqis,paris; 3117 VecScatter scatter_ctx; 3118 PetscScalar *array; 3119 PetscInt *temp_global_dofs; 3120 PetscScalar globalsum; 3121 PetscInt i,j,s; 3122 PetscInt nlocals,first_index,old_index,max_local; 3123 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 3124 PetscMPIInt *dof_sizes,*dof_displs; 3125 PetscBool first_found; 3126 PetscErrorCode ierr; 3127 3128 PetscFunctionBegin; 3129 /* mpi buffers */ 3130 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3131 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3132 j = ( !rank_prec_comm ? size_prec_comm : 0); 3133 ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr); 3134 ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr); 3135 /* get maximum size of subset */ 3136 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3137 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3138 max_local = 0; 3139 for (i=0;i<n_local_dofs;i++) { 3140 if (max_local < temp_global_dofs[i] ) { 3141 max_local = temp_global_dofs[i]; 3142 } 3143 } 3144 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3145 max_global++; 3146 max_local = 0; 3147 for (i=0;i<n_local_dofs;i++) { 3148 if (max_local < local_dofs[i] ) { 3149 max_local = local_dofs[i]; 3150 } 3151 } 3152 max_local++; 3153 /* allocate workspace */ 3154 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3155 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3156 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3157 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3158 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3159 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3160 /* create scatter */ 3161 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3162 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3163 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3164 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3165 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3166 /* init array */ 3167 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3168 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3169 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3170 if (local_dofs_mult) { 3171 for (i=0;i<n_local_dofs;i++) { 3172 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3173 } 3174 } else { 3175 for (i=0;i<n_local_dofs;i++) { 3176 array[local_dofs[i]]=1.0; 3177 } 3178 } 3179 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3180 /* scatter into global vec and get total number of global dofs */ 3181 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3182 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3183 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3184 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3185 /* Fill global_vec with cumulative function for global numbering */ 3186 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3187 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3188 nlocals = 0; 3189 first_index = -1; 3190 first_found = PETSC_FALSE; 3191 for (i=0;i<s;i++) { 3192 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3193 first_found = PETSC_TRUE; 3194 first_index = i; 3195 } 3196 nlocals += (PetscInt)PetscRealPart(array[i]); 3197 } 3198 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3199 if (!rank_prec_comm) { 3200 dof_displs[0]=0; 3201 for (i=1;i<size_prec_comm;i++) { 3202 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3203 } 3204 } 3205 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3206 if (first_found) { 3207 array[first_index] += (PetscScalar)nlocals; 3208 old_index = first_index; 3209 for (i=first_index+1;i<s;i++) { 3210 if (PetscRealPart(array[i]) > 0.1) { 3211 array[i] += array[old_index]; 3212 old_index = i; 3213 } 3214 } 3215 } 3216 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3217 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3218 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3219 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3220 /* get global ordering of local dofs */ 3221 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3222 if (local_dofs_mult) { 3223 for (i=0;i<n_local_dofs;i++) { 3224 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3225 } 3226 } else { 3227 for (i=0;i<n_local_dofs;i++) { 3228 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3229 } 3230 } 3231 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3232 /* free workspace */ 3233 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3234 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3235 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3236 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 3237 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 3238 /* return pointer to global ordering of local dofs */ 3239 *global_numbering_subset = temp_global_dofs; 3240 PetscFunctionReturn(0); 3241 } 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3245 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3246 { 3247 PetscInt i,j; 3248 PetscScalar *alphas; 3249 PetscErrorCode ierr; 3250 3251 PetscFunctionBegin; 3252 /* this implements stabilized Gram-Schmidt */ 3253 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3254 for (i=0;i<n;i++) { 3255 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3256 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3257 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3258 } 3259 ierr = PetscFree(alphas);CHKERRQ(ierr); 3260 PetscFunctionReturn(0); 3261 } 3262 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatISGetSubassemblingPattern" 3265 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3266 { 3267 Mat subdomain_adj; 3268 IS new_ranks,ranks_send_to; 3269 MatPartitioning partitioner; 3270 Mat_IS *matis; 3271 PetscInt n_neighs,*neighs,*n_shared,**shared; 3272 PetscInt prank; 3273 PetscMPIInt size,rank,color; 3274 PetscInt *xadj,*adjncy,*oldranks; 3275 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3276 PetscInt i,local_size,threshold=0; 3277 PetscErrorCode ierr; 3278 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3279 PetscSubcomm subcomm; 3280 3281 PetscFunctionBegin; 3282 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3283 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3284 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3285 3286 /* Get info on mapping */ 3287 matis = (Mat_IS*)(mat->data); 3288 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3289 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3290 3291 /* build local CSR graph of subdomains' connectivity */ 3292 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3293 xadj[0] = 0; 3294 xadj[1] = PetscMax(n_neighs-1,0); 3295 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3296 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3297 3298 if (threshold) { 3299 PetscInt xadj_count = 0; 3300 for (i=1;i<n_neighs;i++) { 3301 if (n_shared[i] > threshold) { 3302 adjncy[xadj_count] = neighs[i]; 3303 adjncy_wgt[xadj_count] = n_shared[i]; 3304 xadj_count++; 3305 } 3306 } 3307 xadj[1] = xadj_count; 3308 } else { 3309 if (xadj[1]) { 3310 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3311 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3312 } 3313 } 3314 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3315 if (use_square) { 3316 for (i=0;i<xadj[1];i++) { 3317 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3318 } 3319 } 3320 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3321 3322 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3323 3324 /* 3325 Restrict work on active processes only. 3326 */ 3327 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3328 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3329 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3330 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3331 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3332 if (color) { 3333 ierr = PetscFree(xadj);CHKERRQ(ierr); 3334 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3335 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3336 } else { 3337 PetscInt coarsening_ratio; 3338 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3339 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3340 prank = rank; 3341 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3342 /* 3343 for (i=0;i<size;i++) { 3344 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3345 } 3346 */ 3347 for (i=0;i<xadj[1];i++) { 3348 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3349 } 3350 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3351 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3352 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3353 3354 /* Partition */ 3355 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3356 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3357 if (use_vwgt) { 3358 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3359 v_wgt[0] = local_size; 3360 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3361 } 3362 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3363 coarsening_ratio = size/n_subdomains; 3364 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3365 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3366 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3367 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3368 3369 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3370 if (contiguous) { 3371 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3372 } else { 3373 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3374 } 3375 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3376 /* clean up */ 3377 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3378 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3379 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3380 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3381 } 3382 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3383 3384 /* assemble parallel IS for sends */ 3385 i = 1; 3386 if (color) i=0; 3387 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3388 3389 /* get back IS */ 3390 *is_sends = ranks_send_to; 3391 PetscFunctionReturn(0); 3392 } 3393 3394 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3395 3396 #undef __FUNCT__ 3397 #define __FUNCT__ "MatISSubassemble" 3398 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3399 { 3400 Mat local_mat; 3401 Mat_IS *matis; 3402 IS is_sends_internal; 3403 PetscInt rows,cols,new_local_rows; 3404 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3405 PetscBool ismatis,isdense,newisdense,destroy_mat; 3406 ISLocalToGlobalMapping l2gmap; 3407 PetscInt* l2gmap_indices; 3408 const PetscInt* is_indices; 3409 MatType new_local_type; 3410 /* buffers */ 3411 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3412 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3413 PetscInt *recv_buffer_idxs_local; 3414 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3415 /* MPI */ 3416 MPI_Comm comm,comm_n; 3417 PetscSubcomm subcomm; 3418 PetscMPIInt n_sends,n_recvs,commsize; 3419 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3420 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3421 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3422 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3423 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3424 PetscErrorCode ierr; 3425 3426 PetscFunctionBegin; 3427 /* TODO: add missing checks */ 3428 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3429 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3430 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3431 PetscValidLogicalCollectiveInt(mat,nis,7); 3432 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3433 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3434 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3435 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3436 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3437 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3438 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3439 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3440 PetscInt mrows,mcols,mnrows,mncols; 3441 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3442 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3443 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3444 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3445 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3446 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3447 } 3448 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3449 PetscValidLogicalCollectiveInt(mat,bs,0); 3450 /* prepare IS for sending if not provided */ 3451 if (!is_sends) { 3452 PetscBool pcontig = PETSC_TRUE; 3453 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3454 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3455 } else { 3456 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3457 is_sends_internal = is_sends; 3458 } 3459 3460 /* get pointer of MATIS data */ 3461 matis = (Mat_IS*)mat->data; 3462 3463 /* get comm */ 3464 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3465 3466 /* compute number of sends */ 3467 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3468 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3469 3470 /* compute number of receives */ 3471 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3472 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3473 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3474 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3475 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3476 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3477 ierr = PetscFree(iflags);CHKERRQ(ierr); 3478 3479 /* restrict comm if requested */ 3480 subcomm = 0; 3481 destroy_mat = PETSC_FALSE; 3482 if (restrict_comm) { 3483 PetscMPIInt color,subcommsize; 3484 3485 color = 0; 3486 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3487 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3488 subcommsize = commsize - subcommsize; 3489 /* check if reuse has been requested */ 3490 if (reuse == MAT_REUSE_MATRIX) { 3491 if (*mat_n) { 3492 PetscMPIInt subcommsize2; 3493 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3494 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3495 comm_n = PetscObjectComm((PetscObject)*mat_n); 3496 } else { 3497 comm_n = PETSC_COMM_SELF; 3498 } 3499 } else { /* MAT_INITIAL_MATRIX */ 3500 PetscMPIInt rank; 3501 3502 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3503 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3504 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3505 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3506 comm_n = PetscSubcommChild(subcomm); 3507 } 3508 /* flag to destroy *mat_n if not significative */ 3509 if (color) destroy_mat = PETSC_TRUE; 3510 } else { 3511 comm_n = comm; 3512 } 3513 3514 /* prepare send/receive buffers */ 3515 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3516 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3517 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3518 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3519 if (nis) { 3520 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3521 } 3522 3523 /* Get data from local matrices */ 3524 if (!isdense) { 3525 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3526 /* TODO: See below some guidelines on how to prepare the local buffers */ 3527 /* 3528 send_buffer_vals should contain the raw values of the local matrix 3529 send_buffer_idxs should contain: 3530 - MatType_PRIVATE type 3531 - PetscInt size_of_l2gmap 3532 - PetscInt global_row_indices[size_of_l2gmap] 3533 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3534 */ 3535 } else { 3536 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3537 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3538 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3539 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3540 send_buffer_idxs[1] = i; 3541 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3542 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3543 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3544 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3545 for (i=0;i<n_sends;i++) { 3546 ilengths_vals[is_indices[i]] = len*len; 3547 ilengths_idxs[is_indices[i]] = len+2; 3548 } 3549 } 3550 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3551 /* additional is (if any) */ 3552 if (nis) { 3553 PetscMPIInt psum; 3554 PetscInt j; 3555 for (j=0,psum=0;j<nis;j++) { 3556 PetscInt plen; 3557 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3558 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3559 psum += len+1; /* indices + lenght */ 3560 } 3561 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3562 for (j=0,psum=0;j<nis;j++) { 3563 PetscInt plen; 3564 const PetscInt *is_array_idxs; 3565 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3566 send_buffer_idxs_is[psum] = plen; 3567 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3568 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3569 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3570 psum += plen+1; /* indices + lenght */ 3571 } 3572 for (i=0;i<n_sends;i++) { 3573 ilengths_idxs_is[is_indices[i]] = psum; 3574 } 3575 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3576 } 3577 3578 buf_size_idxs = 0; 3579 buf_size_vals = 0; 3580 buf_size_idxs_is = 0; 3581 for (i=0;i<n_recvs;i++) { 3582 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3583 buf_size_vals += (PetscInt)olengths_vals[i]; 3584 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3585 } 3586 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3587 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3588 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3589 3590 /* get new tags for clean communications */ 3591 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3592 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3593 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3594 3595 /* allocate for requests */ 3596 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3597 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3598 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3599 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3600 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3601 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3602 3603 /* communications */ 3604 ptr_idxs = recv_buffer_idxs; 3605 ptr_vals = recv_buffer_vals; 3606 ptr_idxs_is = recv_buffer_idxs_is; 3607 for (i=0;i<n_recvs;i++) { 3608 source_dest = onodes[i]; 3609 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3610 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3611 ptr_idxs += olengths_idxs[i]; 3612 ptr_vals += olengths_vals[i]; 3613 if (nis) { 3614 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3615 ptr_idxs_is += olengths_idxs_is[i]; 3616 } 3617 } 3618 for (i=0;i<n_sends;i++) { 3619 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3620 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3621 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3622 if (nis) { 3623 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3624 } 3625 } 3626 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3627 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3628 3629 /* assemble new l2g map */ 3630 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3631 ptr_idxs = recv_buffer_idxs; 3632 new_local_rows = 0; 3633 for (i=0;i<n_recvs;i++) { 3634 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3635 ptr_idxs += olengths_idxs[i]; 3636 } 3637 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3638 ptr_idxs = recv_buffer_idxs; 3639 new_local_rows = 0; 3640 for (i=0;i<n_recvs;i++) { 3641 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3642 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3643 ptr_idxs += olengths_idxs[i]; 3644 } 3645 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3646 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3647 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3648 3649 /* infer new local matrix type from received local matrices type */ 3650 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3651 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3652 if (n_recvs) { 3653 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3654 ptr_idxs = recv_buffer_idxs; 3655 for (i=0;i<n_recvs;i++) { 3656 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3657 new_local_type_private = MATAIJ_PRIVATE; 3658 break; 3659 } 3660 ptr_idxs += olengths_idxs[i]; 3661 } 3662 switch (new_local_type_private) { 3663 case MATDENSE_PRIVATE: 3664 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3665 new_local_type = MATSEQAIJ; 3666 bs = 1; 3667 } else { /* if I receive only 1 dense matrix */ 3668 new_local_type = MATSEQDENSE; 3669 bs = 1; 3670 } 3671 break; 3672 case MATAIJ_PRIVATE: 3673 new_local_type = MATSEQAIJ; 3674 bs = 1; 3675 break; 3676 case MATBAIJ_PRIVATE: 3677 new_local_type = MATSEQBAIJ; 3678 break; 3679 case MATSBAIJ_PRIVATE: 3680 new_local_type = MATSEQSBAIJ; 3681 break; 3682 default: 3683 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3684 break; 3685 } 3686 } else { /* by default, new_local_type is seqdense */ 3687 new_local_type = MATSEQDENSE; 3688 bs = 1; 3689 } 3690 3691 /* create MATIS object if needed */ 3692 if (reuse == MAT_INITIAL_MATRIX) { 3693 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3694 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3695 } else { 3696 /* it also destroys the local matrices */ 3697 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3698 } 3699 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3700 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3701 3702 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3703 3704 /* Global to local map of received indices */ 3705 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3706 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3707 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3708 3709 /* restore attributes -> type of incoming data and its size */ 3710 buf_size_idxs = 0; 3711 for (i=0;i<n_recvs;i++) { 3712 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3713 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3714 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3715 } 3716 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3717 3718 /* set preallocation */ 3719 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3720 if (!newisdense) { 3721 PetscInt *new_local_nnz=0; 3722 3723 ptr_vals = recv_buffer_vals; 3724 ptr_idxs = recv_buffer_idxs_local; 3725 if (n_recvs) { 3726 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3727 } 3728 for (i=0;i<n_recvs;i++) { 3729 PetscInt j; 3730 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3731 for (j=0;j<*(ptr_idxs+1);j++) { 3732 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3733 } 3734 } else { 3735 /* TODO */ 3736 } 3737 ptr_idxs += olengths_idxs[i]; 3738 } 3739 if (new_local_nnz) { 3740 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3741 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3742 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3743 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3744 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3745 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3746 } else { 3747 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3748 } 3749 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3750 } else { 3751 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3752 } 3753 3754 /* set values */ 3755 ptr_vals = recv_buffer_vals; 3756 ptr_idxs = recv_buffer_idxs_local; 3757 for (i=0;i<n_recvs;i++) { 3758 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3759 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3760 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3761 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3762 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3763 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3764 } else { 3765 /* TODO */ 3766 } 3767 ptr_idxs += olengths_idxs[i]; 3768 ptr_vals += olengths_vals[i]; 3769 } 3770 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3771 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3772 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3773 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3774 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3775 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3776 3777 #if 0 3778 if (!restrict_comm) { /* check */ 3779 Vec lvec,rvec; 3780 PetscReal infty_error; 3781 3782 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3783 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3784 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3785 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3786 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3787 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3788 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3789 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3790 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3791 } 3792 #endif 3793 3794 /* assemble new additional is (if any) */ 3795 if (nis) { 3796 PetscInt **temp_idxs,*count_is,j,psum; 3797 3798 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3799 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3800 ptr_idxs = recv_buffer_idxs_is; 3801 psum = 0; 3802 for (i=0;i<n_recvs;i++) { 3803 for (j=0;j<nis;j++) { 3804 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3805 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3806 psum += plen; 3807 ptr_idxs += plen+1; /* shift pointer to received data */ 3808 } 3809 } 3810 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3811 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3812 for (i=1;i<nis;i++) { 3813 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3814 } 3815 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3816 ptr_idxs = recv_buffer_idxs_is; 3817 for (i=0;i<n_recvs;i++) { 3818 for (j=0;j<nis;j++) { 3819 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3820 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3821 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3822 ptr_idxs += plen+1; /* shift pointer to received data */ 3823 } 3824 } 3825 for (i=0;i<nis;i++) { 3826 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3827 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3828 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3829 } 3830 ierr = PetscFree(count_is);CHKERRQ(ierr); 3831 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3832 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3833 } 3834 /* free workspace */ 3835 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3836 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3837 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3838 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3839 if (isdense) { 3840 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3841 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3842 } else { 3843 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3844 } 3845 if (nis) { 3846 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3847 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3848 } 3849 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3850 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3851 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3852 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3853 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3854 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3855 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3856 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3857 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3858 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3859 ierr = PetscFree(onodes);CHKERRQ(ierr); 3860 if (nis) { 3861 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3862 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3863 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3864 } 3865 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3866 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3867 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3868 for (i=0;i<nis;i++) { 3869 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3870 } 3871 } 3872 PetscFunctionReturn(0); 3873 } 3874 3875 /* temporary hack into ksp private data structure */ 3876 #include <petsc-private/kspimpl.h> 3877 3878 #undef __FUNCT__ 3879 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3880 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3881 { 3882 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3883 PC_IS *pcis = (PC_IS*)pc->data; 3884 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3885 MatNullSpace CoarseNullSpace=NULL; 3886 ISLocalToGlobalMapping coarse_islg; 3887 IS coarse_is,*isarray; 3888 PetscInt i,im_active=-1,active_procs=-1; 3889 PetscInt nis,nisdofs,nisneu; 3890 PC pc_temp; 3891 PCType coarse_pc_type; 3892 KSPType coarse_ksp_type; 3893 PetscBool multilevel_requested,multilevel_allowed; 3894 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3895 Mat t_coarse_mat_is; 3896 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3897 PetscMPIInt all_procs; 3898 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3899 PetscBool compute_vecs = PETSC_FALSE; 3900 PetscScalar *array; 3901 PetscErrorCode ierr; 3902 3903 PetscFunctionBegin; 3904 /* Assign global numbering to coarse dofs */ 3905 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3906 PetscInt ocoarse_size; 3907 compute_vecs = PETSC_TRUE; 3908 ocoarse_size = pcbddc->coarse_size; 3909 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3910 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3911 /* see if we can avoid some work */ 3912 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3913 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3914 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3915 coarse_reuse = PETSC_FALSE; 3916 } else { /* we can safely reuse already computed coarse matrix */ 3917 coarse_reuse = PETSC_TRUE; 3918 } 3919 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3920 coarse_reuse = PETSC_FALSE; 3921 } 3922 /* reset any subassembling information */ 3923 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3924 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3925 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3926 coarse_reuse = PETSC_TRUE; 3927 } 3928 3929 /* count "active" (i.e. with positive local size) and "void" processes */ 3930 im_active = !!(pcis->n); 3931 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3932 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3933 void_procs = all_procs-active_procs; 3934 csin_type_simple = PETSC_TRUE; 3935 redist = PETSC_FALSE; 3936 if (pcbddc->current_level && void_procs) { 3937 csin_ml = PETSC_TRUE; 3938 ncoarse_ml = void_procs; 3939 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3940 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3941 csin_ds = PETSC_TRUE; 3942 ncoarse_ds = pcbddc->redistribute_coarse; 3943 redist = PETSC_TRUE; 3944 } else { 3945 csin_ds = PETSC_TRUE; 3946 ncoarse_ds = active_procs; 3947 redist = PETSC_TRUE; 3948 } 3949 } else { 3950 csin_ml = PETSC_FALSE; 3951 ncoarse_ml = all_procs; 3952 if (void_procs) { 3953 csin_ds = PETSC_TRUE; 3954 ncoarse_ds = void_procs; 3955 csin_type_simple = PETSC_FALSE; 3956 } else { 3957 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3958 csin_ds = PETSC_TRUE; 3959 ncoarse_ds = pcbddc->redistribute_coarse; 3960 redist = PETSC_TRUE; 3961 } else { 3962 csin_ds = PETSC_FALSE; 3963 ncoarse_ds = all_procs; 3964 } 3965 } 3966 } 3967 3968 /* 3969 test if we can go multilevel: three conditions must be satisfied: 3970 - we have not exceeded the number of levels requested 3971 - we can actually subassemble the active processes 3972 - we can find a suitable number of MPI processes where we can place the subassembled problem 3973 */ 3974 multilevel_allowed = PETSC_FALSE; 3975 multilevel_requested = PETSC_FALSE; 3976 if (pcbddc->current_level < pcbddc->max_levels) { 3977 multilevel_requested = PETSC_TRUE; 3978 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3979 multilevel_allowed = PETSC_FALSE; 3980 } else { 3981 multilevel_allowed = PETSC_TRUE; 3982 } 3983 } 3984 /* determine number of process partecipating to coarse solver */ 3985 if (multilevel_allowed) { 3986 ncoarse = ncoarse_ml; 3987 csin = csin_ml; 3988 redist = PETSC_FALSE; 3989 } else { 3990 ncoarse = ncoarse_ds; 3991 csin = csin_ds; 3992 } 3993 3994 /* creates temporary l2gmap and IS for coarse indexes */ 3995 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3996 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3997 3998 /* creates temporary MATIS object for coarse matrix */ 3999 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4000 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4001 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4002 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4003 #if 0 4004 { 4005 PetscViewer viewer; 4006 char filename[256]; 4007 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4008 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4009 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4010 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4011 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4012 } 4013 #endif 4014 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4015 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4016 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4017 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4018 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4019 4020 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4021 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4022 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4023 const PetscInt *idxs; 4024 ISLocalToGlobalMapping tmap; 4025 4026 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4027 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4028 /* allocate space for temporary storage */ 4029 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4030 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4031 /* allocate for IS array */ 4032 nisdofs = pcbddc->n_ISForDofsLocal; 4033 nisneu = !!pcbddc->NeumannBoundariesLocal; 4034 nis = nisdofs + nisneu; 4035 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4036 /* dofs splitting */ 4037 for (i=0;i<nisdofs;i++) { 4038 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4039 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4040 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4041 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4042 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4043 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4044 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4045 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4046 } 4047 /* neumann boundaries */ 4048 if (pcbddc->NeumannBoundariesLocal) { 4049 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4050 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4051 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4052 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4053 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4054 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4055 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4056 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4057 } 4058 /* free memory */ 4059 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4060 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4061 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4062 } else { 4063 nis = 0; 4064 nisdofs = 0; 4065 nisneu = 0; 4066 isarray = NULL; 4067 } 4068 /* destroy no longer needed map */ 4069 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4070 4071 /* restrict on coarse candidates (if needed) */ 4072 coarse_mat_is = NULL; 4073 if (csin) { 4074 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4075 if (redist) { 4076 PetscMPIInt rank; 4077 PetscInt spc,n_spc_p1,dest[1],destsize; 4078 4079 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4080 spc = active_procs/ncoarse; 4081 n_spc_p1 = active_procs%ncoarse; 4082 if (im_active) { 4083 destsize = 1; 4084 if (rank > n_spc_p1*(spc+1)-1) { 4085 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4086 } else { 4087 dest[0] = rank/(spc+1); 4088 } 4089 } else { 4090 destsize = 0; 4091 } 4092 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4093 } else if (csin_type_simple) { 4094 PetscMPIInt rank; 4095 PetscInt issize,isidx; 4096 4097 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4098 if (im_active) { 4099 issize = 1; 4100 isidx = (PetscInt)rank; 4101 } else { 4102 issize = 0; 4103 isidx = -1; 4104 } 4105 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4106 } else { /* get a suitable subassembling pattern from MATIS code */ 4107 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4108 } 4109 4110 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4111 if (!redist || ncoarse <= void_procs) { 4112 PetscInt ncoarse_cand,tissize,*nisindices; 4113 PetscInt *coarse_candidates; 4114 const PetscInt* tisindices; 4115 4116 /* get coarse candidates' ranks in pc communicator */ 4117 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4118 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4119 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4120 if (!coarse_candidates[i]) { 4121 coarse_candidates[ncoarse_cand++]=i; 4122 } 4123 } 4124 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4125 4126 4127 if (pcbddc->dbg_flag) { 4128 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4129 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4130 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4131 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4132 for (i=0;i<ncoarse_cand;i++) { 4133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4134 } 4135 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4136 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4137 } 4138 /* shift the pattern on coarse candidates */ 4139 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4140 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4141 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4142 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4143 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4144 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4145 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4146 } 4147 if (pcbddc->dbg_flag) { 4148 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4150 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4151 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4152 } 4153 } 4154 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4155 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4156 } else { 4157 if (pcbddc->dbg_flag) { 4158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4161 } 4162 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4163 coarse_mat_is = t_coarse_mat_is; 4164 } 4165 4166 /* create local to global scatters for coarse problem */ 4167 if (compute_vecs) { 4168 PetscInt lrows; 4169 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4170 if (coarse_mat_is) { 4171 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4172 } else { 4173 lrows = 0; 4174 } 4175 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4176 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4177 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4178 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4179 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4180 } 4181 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4182 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4183 4184 /* set defaults for coarse KSP and PC */ 4185 if (multilevel_allowed) { 4186 coarse_ksp_type = KSPRICHARDSON; 4187 coarse_pc_type = PCBDDC; 4188 } else { 4189 coarse_ksp_type = KSPPREONLY; 4190 coarse_pc_type = PCREDUNDANT; 4191 } 4192 4193 /* print some info if requested */ 4194 if (pcbddc->dbg_flag) { 4195 if (!multilevel_allowed) { 4196 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4197 if (multilevel_requested) { 4198 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4199 } else if (pcbddc->max_levels) { 4200 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4201 } 4202 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4203 } 4204 } 4205 4206 /* create the coarse KSP object only once with defaults */ 4207 if (coarse_mat_is) { 4208 MatReuse coarse_mat_reuse; 4209 PetscViewer dbg_viewer = NULL; 4210 if (pcbddc->dbg_flag) { 4211 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4212 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4213 } 4214 if (!pcbddc->coarse_ksp) { 4215 char prefix[256],str_level[16]; 4216 size_t len; 4217 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4218 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4219 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4220 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4221 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4222 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4223 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4224 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4225 /* prefix */ 4226 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4227 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4228 if (!pcbddc->current_level) { 4229 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4230 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4231 } else { 4232 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4233 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4234 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4235 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4236 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4237 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4238 } 4239 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4240 /* allow user customization */ 4241 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4242 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4243 } 4244 4245 /* get some info after set from options */ 4246 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4247 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4248 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4249 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4250 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4251 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4252 isbddc = PETSC_FALSE; 4253 } 4254 if (isredundant) { 4255 KSP inner_ksp; 4256 PC inner_pc; 4257 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4258 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4259 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4260 } 4261 4262 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4263 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4264 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4265 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4266 if (nisdofs) { 4267 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4268 for (i=0;i<nisdofs;i++) { 4269 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4270 } 4271 } 4272 if (nisneu) { 4273 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4274 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4275 } 4276 4277 /* assemble coarse matrix */ 4278 if (coarse_reuse) { 4279 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4280 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4281 coarse_mat_reuse = MAT_REUSE_MATRIX; 4282 } else { 4283 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4284 } 4285 if (isbddc || isnn) { 4286 if (pcbddc->coarsening_ratio > 1) { 4287 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4288 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4289 if (pcbddc->dbg_flag) { 4290 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4291 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4292 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4293 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4294 } 4295 } 4296 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4297 } else { 4298 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4299 coarse_mat = coarse_mat_is; 4300 } 4301 } else { 4302 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4303 } 4304 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4305 4306 /* propagate symmetry info to coarse matrix */ 4307 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4308 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4309 4310 /* set operators */ 4311 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4312 if (pcbddc->dbg_flag) { 4313 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4314 } 4315 } else { /* processes non partecipating to coarse solver (if any) */ 4316 coarse_mat = 0; 4317 } 4318 ierr = PetscFree(isarray);CHKERRQ(ierr); 4319 #if 0 4320 { 4321 PetscViewer viewer; 4322 char filename[256]; 4323 sprintf(filename,"coarse_mat.m"); 4324 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4325 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4326 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4327 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4328 } 4329 #endif 4330 4331 /* Compute coarse null space (special handling by BDDC only) */ 4332 if (pcbddc->NullSpace) { 4333 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4334 } 4335 4336 if (pcbddc->coarse_ksp) { 4337 Vec crhs,csol; 4338 PetscBool ispreonly; 4339 if (CoarseNullSpace) { 4340 if (isbddc) { 4341 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4342 } else { 4343 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4344 } 4345 } 4346 /* setup coarse ksp */ 4347 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4348 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4349 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4350 /* hack */ 4351 if (!csol) { 4352 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4353 } 4354 if (!crhs) { 4355 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4356 } 4357 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4358 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4359 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4360 KSP check_ksp; 4361 KSPType check_ksp_type; 4362 PC check_pc; 4363 Vec check_vec,coarse_vec; 4364 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4365 PetscInt its; 4366 PetscBool compute_eigs; 4367 PetscReal *eigs_r,*eigs_c; 4368 PetscInt neigs; 4369 const char *prefix; 4370 4371 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4372 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4373 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4374 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4375 if (ispreonly) { 4376 check_ksp_type = KSPPREONLY; 4377 compute_eigs = PETSC_FALSE; 4378 } else { 4379 check_ksp_type = KSPGMRES; 4380 compute_eigs = PETSC_TRUE; 4381 } 4382 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4383 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4384 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4385 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4386 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4387 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4388 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4389 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4390 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4391 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4392 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4393 /* create random vec */ 4394 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4395 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4396 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4397 if (CoarseNullSpace) { 4398 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4399 } 4400 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4401 /* solve coarse problem */ 4402 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4403 if (CoarseNullSpace) { 4404 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4405 } 4406 /* set eigenvalue estimation if preonly has not been requested */ 4407 if (compute_eigs) { 4408 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4409 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4410 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4411 lambda_max = eigs_r[neigs-1]; 4412 lambda_min = eigs_r[0]; 4413 if (pcbddc->use_coarse_estimates) { 4414 if (lambda_max>lambda_min) { 4415 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4416 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4417 } 4418 } 4419 } 4420 4421 /* check coarse problem residual error */ 4422 if (pcbddc->dbg_flag) { 4423 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4424 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4425 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4426 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4427 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4428 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4429 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4430 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4431 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4432 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4433 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4434 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4435 if (compute_eigs) { 4436 PetscReal lambda_max_s,lambda_min_s; 4437 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4438 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4439 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4440 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4441 for (i=0;i<neigs;i++) { 4442 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4443 } 4444 } 4445 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4446 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4447 } 4448 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4449 if (compute_eigs) { 4450 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4451 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4452 } 4453 } 4454 } 4455 /* print additional info */ 4456 if (pcbddc->dbg_flag) { 4457 /* waits until all processes reaches this point */ 4458 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4459 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4460 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4461 } 4462 4463 /* free memory */ 4464 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4465 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4466 PetscFunctionReturn(0); 4467 } 4468 4469 #undef __FUNCT__ 4470 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4471 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4472 { 4473 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4474 PC_IS* pcis = (PC_IS*)pc->data; 4475 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4476 PetscInt i,coarse_size; 4477 PetscInt *local_primal_indices; 4478 PetscErrorCode ierr; 4479 4480 PetscFunctionBegin; 4481 /* Compute global number of coarse dofs */ 4482 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4483 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4484 } 4485 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4486 4487 /* check numbering */ 4488 if (pcbddc->dbg_flag) { 4489 PetscScalar coarsesum,*array; 4490 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4491 4492 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4493 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4494 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4495 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4496 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4497 for (i=0;i<pcbddc->local_primal_size;i++) { 4498 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4499 } 4500 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4501 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4502 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4503 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4504 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4505 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4506 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4507 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4508 for (i=0;i<pcis->n;i++) { 4509 if (array[i] == 1.0) { 4510 set_error = PETSC_TRUE; 4511 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4512 } 4513 } 4514 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4515 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4516 for (i=0;i<pcis->n;i++) { 4517 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4518 } 4519 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4520 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4521 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4522 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4523 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4524 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4525 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4526 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4527 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4528 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4529 for (i=0;i<pcbddc->local_primal_size;i++) { 4530 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 4531 } 4532 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4533 } 4534 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4535 if (set_error_reduced) { 4536 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4537 } 4538 } 4539 /* get back data */ 4540 *coarse_size_n = coarse_size; 4541 *local_primal_indices_n = local_primal_indices; 4542 PetscFunctionReturn(0); 4543 } 4544 4545 #undef __FUNCT__ 4546 #define __FUNCT__ "PCBDDCGlobalToLocal" 4547 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4548 { 4549 IS localis_t; 4550 PetscInt i,lsize,*idxs,n; 4551 PetscScalar *vals; 4552 PetscErrorCode ierr; 4553 4554 PetscFunctionBegin; 4555 /* get indices in local ordering exploiting local to global map */ 4556 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4557 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4558 for (i=0;i<lsize;i++) vals[i] = 1.0; 4559 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4560 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4561 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4562 if (idxs) { /* multilevel guard */ 4563 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4564 } 4565 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4566 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4567 ierr = PetscFree(vals);CHKERRQ(ierr); 4568 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4569 /* now compute set in local ordering */ 4570 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4571 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4572 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4573 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4574 for (i=0,lsize=0;i<n;i++) { 4575 if (PetscRealPart(vals[i]) > 0.5) { 4576 lsize++; 4577 } 4578 } 4579 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4580 for (i=0,lsize=0;i<n;i++) { 4581 if (PetscRealPart(vals[i]) > 0.5) { 4582 idxs[lsize++] = i; 4583 } 4584 } 4585 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4586 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4587 *localis = localis_t; 4588 PetscFunctionReturn(0); 4589 } 4590 4591 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4592 #undef __FUNCT__ 4593 #define __FUNCT__ "PCBDDCMatMult_Private" 4594 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4595 { 4596 PCBDDCChange_ctx change_ctx; 4597 PetscErrorCode ierr; 4598 4599 PetscFunctionBegin; 4600 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4601 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4602 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4603 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4604 PetscFunctionReturn(0); 4605 } 4606 4607 #undef __FUNCT__ 4608 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4609 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4610 { 4611 PCBDDCChange_ctx change_ctx; 4612 PetscErrorCode ierr; 4613 4614 PetscFunctionBegin; 4615 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4616 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4617 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4618 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4619 PetscFunctionReturn(0); 4620 } 4621 4622 #undef __FUNCT__ 4623 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4624 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4625 { 4626 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4627 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4628 PetscInt *used_xadj,*used_adjncy; 4629 PetscBool free_used_adj; 4630 PetscErrorCode ierr; 4631 4632 PetscFunctionBegin; 4633 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4634 free_used_adj = PETSC_FALSE; 4635 if (pcbddc->sub_schurs_layers == -1) { 4636 used_xadj = NULL; 4637 used_adjncy = NULL; 4638 } else { 4639 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4640 used_xadj = pcbddc->mat_graph->xadj; 4641 used_adjncy = pcbddc->mat_graph->adjncy; 4642 } else if (pcbddc->computed_rowadj) { 4643 used_xadj = pcbddc->mat_graph->xadj; 4644 used_adjncy = pcbddc->mat_graph->adjncy; 4645 } else { 4646 PetscBool flg_row=PETSC_FALSE; 4647 const PetscInt *xadj,*adjncy; 4648 PetscInt nvtxs; 4649 4650 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4651 if (flg_row) { 4652 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4653 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4654 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4655 free_used_adj = PETSC_TRUE; 4656 } else { 4657 pcbddc->sub_schurs_layers = -1; 4658 used_xadj = NULL; 4659 used_adjncy = NULL; 4660 } 4661 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4662 } 4663 } 4664 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4665 4666 /* free adjacency */ 4667 if (free_used_adj) { 4668 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4669 } 4670 PetscFunctionReturn(0); 4671 } 4672 4673 #undef __FUNCT__ 4674 #define __FUNCT__ "PCBDDCInitSubSchurs" 4675 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4676 { 4677 PC_IS *pcis=(PC_IS*)pc->data; 4678 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4679 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4680 PCBDDCGraph graph; 4681 Mat S_j; 4682 PetscErrorCode ierr; 4683 4684 PetscFunctionBegin; 4685 /* attach interface graph for determining subsets */ 4686 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4687 IS verticesIS; 4688 4689 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4690 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4691 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4692 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4693 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4694 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4695 /* 4696 if (pcbddc->dbg_flag) { 4697 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4698 } 4699 */ 4700 } else { 4701 graph = pcbddc->mat_graph; 4702 } 4703 4704 /* Create Schur complement matrix */ 4705 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4706 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4707 4708 /* sub_schurs init */ 4709 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr); 4710 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4711 /* free graph struct */ 4712 if (pcbddc->sub_schurs_rebuild) { 4713 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4714 } 4715 PetscFunctionReturn(0); 4716 } 4717