1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 60 nmax = PetscMax(nmin,nmax); 61 62 /* allocate lapack workspace */ 63 cum = cum2 = 0; 64 maxneigs = 0; 65 for (i=0;i<sub_schurs->n_subs;i++) { 66 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 67 PetscInt n,subset_size; 68 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 69 n = PetscMin(subset_size,nmax); 70 cum += subset_size*n; 71 cum2 += n; 72 maxneigs = PetscMax(maxneigs,n); 73 } 74 } 75 76 if (mss) { 77 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 78 PetscBLASInt B_itype = 1; 79 PetscBLASInt B_N = mss; 80 PetscReal zero = 0.0; 81 PetscReal eps = 0.0; /* dlamch? */ 82 83 B_lwork = -1; 84 S = NULL; 85 St = NULL; 86 eigs = NULL; 87 eigv = NULL; 88 B_iwork = NULL; 89 B_ifail = NULL; 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */ 108 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 112 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 113 #if defined(PETSC_USE_COMPLEX) 114 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 115 #endif 116 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 117 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 118 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 119 nv+cum,&pcbddc->adaptive_constraints_idxs, 120 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 121 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 122 123 maxneigs = 0; 124 cum = cum2 = cumarray = 0; 125 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 126 const PetscInt *idxs; 127 128 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 129 for (cum=0;cum<nv;cum++) { 130 pcbddc->adaptive_constraints_n[cum] = 1; 131 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 132 pcbddc->adaptive_constraints_ptrs[cum] = cum; 133 pcbddc->adaptive_constraints_data[cum] = 1.0; 134 } 135 cum2 = cum; 136 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 137 } 138 139 if (mss) { /* multilevel */ 140 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 141 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 142 } 143 144 for (i=0;i<sub_schurs->n_subs;i++) { 145 PetscInt j,subset_size; 146 147 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 148 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 149 const PetscInt *idxs; 150 PetscScalar one = 1.0,scalar_zero = 0.0; 151 PetscReal zero=0.0; 152 PetscBLASInt B_N; 153 154 /* S should be copied since we need it for deluxe scaling */ 155 if (sub_schurs->is_hermitian) { 156 PetscInt j; 157 for (j=0;j<subset_size;j++) { 158 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 159 } 160 for (j=0;j<subset_size;j++) { 161 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 162 } 163 } else { 164 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 /* is always this the right matrix? */ 168 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 170 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 171 /* St = Starray+cumarray; */ 172 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 173 if (pcbddc->adaptive_threshold > 1.0) { /* this is an heuristic for edges */ 174 thresh = (1.*pcbddc->mat_graph->count[idxs[0]])/(pcbddc->adaptive_threshold); 175 } else { 176 thresh = 1.0; 177 } 178 179 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 180 PetscBLASInt B_itype = 1; 181 PetscBLASInt B_IL = 1, B_IU; 182 PetscReal eps = -1.0; /* dlamch? */ 183 PetscInt nmin_s; 184 185 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 186 /* ask for eigenvalues lower than thresh */ 187 if (pcbddc->dbg_flag) { 188 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 189 } 190 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 191 #if defined(PETSC_USE_COMPLEX) 192 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 193 #else 194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 195 #endif 196 ierr = PetscFPTrapPop();CHKERRQ(ierr); 197 if (B_ierr) { 198 if (B_ierr < 0 ) { 199 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 200 } else if (B_ierr <= B_N) { 201 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 202 } else { 203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 204 } 205 } 206 207 if (B_neigs > nmax) { 208 if (pcbddc->dbg_flag) { 209 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 210 } 211 B_neigs = nmax; 212 } 213 214 nmin_s = PetscMin(nmin,B_N); 215 if (B_neigs < nmin_s) { 216 PetscBLASInt B_neigs2; 217 218 B_IL = B_neigs + 1; 219 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 220 if (pcbddc->dbg_flag) { 221 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 222 } 223 if (sub_schurs->is_hermitian) { 224 PetscInt j; 225 for (j=0;j<subset_size;j++) { 226 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 227 } 228 for (j=0;j<subset_size;j++) { 229 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 230 } 231 } else { 232 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 233 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 236 #if defined(PETSC_USE_COMPLEX) 237 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 238 #else 239 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 240 #endif 241 ierr = PetscFPTrapPop();CHKERRQ(ierr); 242 B_neigs += B_neigs2; 243 } 244 if (B_ierr) { 245 if (B_ierr < 0 ) { 246 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 247 } else if (B_ierr <= B_N) { 248 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 249 } else { 250 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 251 } 252 } 253 if (pcbddc->dbg_flag) { 254 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 255 for (j=0;j<B_neigs;j++) { 256 if (eigs[j] == 0.0) { 257 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 258 } else { 259 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1.0/eigs[j]); 260 } 261 } 262 } 263 } else { 264 /* TODO */ 265 } 266 267 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 268 if (B_neigs) { 269 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 270 } 271 maxneigs = PetscMax(B_neigs,maxneigs); 272 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 273 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 274 275 if (pcbddc->dbg_flag > 1) { 276 PetscInt ii; 277 for (ii=0;ii<B_neigs;ii++) { 278 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 279 for (j=0;j<B_N;j++) { 280 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]); 281 } 282 } 283 } 284 for (j=0;j<B_neigs;j++) { 285 #if 0 286 { 287 PetscBLASInt Blas_N,Blas_one = 1.0; 288 PetscScalar norm; 289 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 290 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 291 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 292 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 293 } else { 294 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 295 } 296 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 297 } 298 #endif 299 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 300 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 301 cum2 += subset_size; 302 } 303 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 304 } 305 /* shift for next computation */ 306 cumarray += subset_size*subset_size; 307 } 308 if (pcbddc->dbg_flag) { 309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 310 } 311 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 312 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 313 314 if (mss) { 315 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 316 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 317 } 318 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 319 #if defined(PETSC_USE_COMPLEX) 320 ierr = PetscFree(rwork);CHKERRQ(ierr); 321 #endif 322 if (pcbddc->dbg_flag) { 323 PetscInt maxneigs_r; 324 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 325 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 326 } 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "PCBDDCSetUpSolvers" 332 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 333 { 334 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 335 PetscScalar *coarse_submat_vals; 336 PetscErrorCode ierr; 337 338 PetscFunctionBegin; 339 /* Setup local scatters R_to_B and (optionally) R_to_D */ 340 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 341 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 342 343 /* Setup local neumann solver ksp_R */ 344 /* PCBDDCSetUpLocalScatters should be called first! */ 345 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 346 347 /* Change global null space passed in by the user if change of basis has been requested */ 348 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 349 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 350 } 351 352 /* 353 Setup local correction and local part of coarse basis. 354 Gives back the dense local part of the coarse matrix in column major ordering 355 */ 356 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 357 358 /* Compute total number of coarse nodes and setup coarse solver */ 359 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 360 361 /* free */ 362 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "PCBDDCResetCustomization" 368 PetscErrorCode PCBDDCResetCustomization(PC pc) 369 { 370 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 371 PetscErrorCode ierr; 372 373 PetscFunctionBegin; 374 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 375 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 376 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 377 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 378 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 379 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 380 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 381 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 383 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 384 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCResetTopography" 390 PetscErrorCode PCBDDCResetTopography(PC pc) 391 { 392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 397 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 398 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 399 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "PCBDDCResetSolvers" 406 PetscErrorCode PCBDDCResetSolvers(PC pc) 407 { 408 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 409 PetscScalar *array; 410 PetscErrorCode ierr; 411 412 PetscFunctionBegin; 413 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 414 if (pcbddc->coarse_phi_B) { 415 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 416 ierr = PetscFree(array);CHKERRQ(ierr); 417 } 418 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 421 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 422 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 423 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 424 if (pcbddc->local_auxmat2) { 425 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 426 ierr = PetscFree(array);CHKERRQ(ierr); 427 } 428 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 430 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 432 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 433 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 434 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 435 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 436 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 437 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 438 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 439 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 440 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 441 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 442 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 443 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 449 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 450 { 451 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 452 PC_IS *pcis = (PC_IS*)pc->data; 453 VecType impVecType; 454 PetscInt n_constraints,n_R,old_size; 455 PetscErrorCode ierr; 456 457 PetscFunctionBegin; 458 if (!pcbddc->ConstraintMatrix) { 459 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 460 } 461 /* get sizes */ 462 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 463 n_R = pcis->n-pcbddc->n_actual_vertices; 464 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 465 /* local work vectors (try to avoid unneeded work)*/ 466 /* R nodes */ 467 old_size = -1; 468 if (pcbddc->vec1_R) { 469 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 470 } 471 if (n_R != old_size) { 472 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 473 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 474 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 475 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 476 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 477 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 478 } 479 /* local primal dofs */ 480 old_size = -1; 481 if (pcbddc->vec1_P) { 482 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 483 } 484 if (pcbddc->local_primal_size != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 486 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 487 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 488 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 489 } 490 /* local explicit constraints */ 491 old_size = -1; 492 if (pcbddc->vec1_C) { 493 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 494 } 495 if (n_constraints && n_constraints != old_size) { 496 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 497 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 498 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 499 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 500 } 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCBDDCSetUpCorrection" 506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 507 { 508 PetscErrorCode ierr; 509 /* pointers to pcis and pcbddc */ 510 PC_IS* pcis = (PC_IS*)pc->data; 511 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 512 /* submatrices of local problem */ 513 Mat A_RV,A_VR,A_VV; 514 /* submatrices of local coarse problem */ 515 Mat S_VV,S_CV,S_VC,S_CC; 516 /* working matrices */ 517 Mat C_CR; 518 /* additional working stuff */ 519 PC pc_R; 520 Mat F; 521 PetscBool isLU,isCHOL,isILU; 522 523 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 524 PetscScalar *work; 525 PetscInt *idx_V_B; 526 PetscInt n,n_vertices,n_constraints; 527 PetscInt i,n_R,n_D,n_B; 528 PetscBool unsymmetric_check; 529 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 530 MatType impMatType; 531 /* some shortcuts to scalars */ 532 PetscScalar one=1.0,m_one=-1.0; 533 534 PetscFunctionBegin; 535 /* get number of vertices (corners plus constraints with change of basis) 536 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 537 n_vertices = pcbddc->n_actual_vertices; 538 n_constraints = pcbddc->local_primal_size-n_vertices; 539 /* Set Non-overlapping dimensions */ 540 n_B = pcis->n_B; n_D = pcis->n - n_B; 541 n_R = pcis->n-n_vertices; 542 543 /* Set types for local objects needed by BDDC precondtioner */ 544 impMatType = MATSEQDENSE; 545 546 /* vertices in boundary numbering */ 547 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 548 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 549 if (i != n_vertices) { 550 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 551 } 552 553 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 554 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 555 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 556 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 557 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 558 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 560 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 563 564 unsymmetric_check = PETSC_FALSE; 565 /* allocate workspace */ 566 n = 0; 567 if (n_constraints) { 568 n += n_R*n_constraints; 569 } 570 if (n_vertices) { 571 n = PetscMax(2*n_R*n_vertices,n); 572 } 573 if (!pcbddc->issym) { 574 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 575 unsymmetric_check = PETSC_TRUE; 576 } 577 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 578 579 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 580 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 581 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 582 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 583 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 584 if (isLU || isILU || isCHOL) { 585 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 586 } else { 587 F = NULL; 588 } 589 590 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 591 if (n_constraints) { 592 Mat M1,M2,M3; 593 IS is_aux; 594 /* see if we can save some allocations */ 595 if (pcbddc->local_auxmat2) { 596 PetscInt on_R,on_constraints; 597 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 598 if (on_R != n_R || on_constraints != n_constraints) { 599 PetscScalar *marray; 600 601 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 602 ierr = PetscFree(marray);CHKERRQ(ierr); 603 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 604 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 605 } 606 } 607 /* auxiliary matrices */ 608 if (!pcbddc->local_auxmat2) { 609 PetscScalar *marray; 610 611 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 612 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 marray += n_R*n_constraints; 614 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 615 } 616 617 /* Extract constraints on R nodes: C_{CR} */ 618 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 619 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 620 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 621 622 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 623 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 624 for (i=0;i<n_constraints;i++) { 625 const PetscScalar *row_cmat_values; 626 const PetscInt *row_cmat_indices; 627 PetscInt size_of_constraint,j; 628 629 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 630 for (j=0;j<size_of_constraint;j++) { 631 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 632 } 633 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 634 } 635 if (F) { 636 Mat B; 637 638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 639 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 640 ierr = MatDestroy(&B);CHKERRQ(ierr); 641 } else { 642 PetscScalar *xarray; 643 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 652 } 653 654 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 655 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 656 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 657 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 658 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 659 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 660 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 661 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 662 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 663 ierr = MatDestroy(&M2);CHKERRQ(ierr); 664 ierr = MatDestroy(&M3);CHKERRQ(ierr); 665 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 666 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 667 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 668 ierr = MatDestroy(&M1);CHKERRQ(ierr); 669 } 670 /* Get submatrices from subdomain matrix */ 671 if (n_vertices) { 672 Mat newmat; 673 IS is_aux; 674 PetscInt ibs,mbs; 675 PetscBool issbaij; 676 677 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 678 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 679 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 680 if (ibs != mbs) { /* need to convert to SEQAIJ */ 681 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 682 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 683 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 684 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 685 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 686 } else { 687 /* this is safe */ 688 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 689 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 690 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 691 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 692 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 693 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 694 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 695 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 696 } else { 697 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 698 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 699 } 700 } 701 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 702 } 703 704 /* Matrix of coarse basis functions (local) */ 705 if (pcbddc->coarse_phi_B) { 706 PetscInt on_B,on_primal,on_D=n_D; 707 if (pcbddc->coarse_phi_D) { 708 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 709 } 710 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 711 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 712 PetscScalar *marray; 713 714 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 715 ierr = PetscFree(marray);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 717 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 720 } 721 } 722 723 if (!pcbddc->coarse_phi_B) { 724 PetscScalar *marray; 725 726 n = n_B*pcbddc->local_primal_size; 727 if (pcbddc->switch_static || pcbddc->dbg_flag) { 728 n += n_D*pcbddc->local_primal_size; 729 } 730 if (!pcbddc->issym) { 731 n *= 2; 732 } 733 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 735 n = n_B*pcbddc->local_primal_size; 736 if (pcbddc->switch_static || pcbddc->dbg_flag) { 737 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 738 n += n_D*pcbddc->local_primal_size; 739 } 740 if (!pcbddc->issym) { 741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 742 if (pcbddc->switch_static || pcbddc->dbg_flag) { 743 n = n_B*pcbddc->local_primal_size; 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 745 } 746 } else { 747 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 748 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 749 if (pcbddc->switch_static || pcbddc->dbg_flag) { 750 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 751 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 752 } 753 } 754 } 755 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 756 /* vertices */ 757 if (n_vertices) { 758 759 if (n_R) { 760 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 761 PetscBLASInt B_N,B_one = 1; 762 PetscScalar *x,*y; 763 764 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 766 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 767 if (F) { 768 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 769 } else { 770 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 771 for (i=0;i<n_vertices;i++) { 772 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 773 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 774 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 775 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 776 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 777 } 778 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 779 } 780 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 781 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 782 if (n_constraints) { 783 Mat B; 784 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 786 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 787 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 788 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 789 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 790 ierr = MatDestroy(&B);CHKERRQ(ierr); 791 } 792 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 793 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 794 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 795 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 796 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 797 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 798 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 799 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 800 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 801 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 802 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 803 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 804 } else { 805 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 806 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 807 } 808 /* coarse basis functions */ 809 for (i=0;i<n_vertices;i++) { 810 PetscScalar *y; 811 812 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 813 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 814 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 815 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 y[n_B*i+idx_V_B[i]] = 1.0; 818 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 819 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 820 821 if (pcbddc->switch_static || pcbddc->dbg_flag) { 822 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 823 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 824 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 827 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 828 } 829 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 830 } 831 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 832 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 833 } 834 835 if (n_constraints) { 836 Mat B; 837 838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 839 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 840 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 841 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 842 if (n_vertices) { 843 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 844 } 845 ierr = MatDestroy(&B);CHKERRQ(ierr); 846 /* coarse basis functions */ 847 for (i=0;i<n_constraints;i++) { 848 PetscScalar *y; 849 850 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 851 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 852 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 853 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 854 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 856 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 857 858 if (pcbddc->switch_static || pcbddc->dbg_flag) { 859 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 860 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 861 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 864 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 865 } 866 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 867 } 868 } 869 870 /* compute other basis functions for non-symmetric problems */ 871 if (!pcbddc->issym) { 872 Mat B,X; 873 874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 875 876 if (n_constraints) { 877 Mat S_CCT,B_C; 878 879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 880 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 881 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 882 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 883 if (n_vertices) { 884 Mat B_V,S_VCT; 885 886 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 887 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 888 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 889 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 890 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 891 } 892 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 893 } 894 if (n_vertices && n_R) { 895 Mat A_VRT; 896 PetscBLASInt B_N,B_one = 1; 897 898 if (!n_constraints) { /* if there are no constraints, reset work */ 899 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 900 } 901 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 902 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 903 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 904 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 905 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 906 } 907 908 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 909 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 910 for (i=0;i<pcbddc->local_primal_size;i++) { 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 912 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 913 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 914 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 915 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 916 } 917 } else { 918 for (i=0;i<pcbddc->local_primal_size;i++) { 919 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 921 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 922 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 923 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 924 } 925 } 926 ierr = MatDestroy(&B);CHKERRQ(ierr); 927 /* coarse basis functions */ 928 for (i=0;i<pcbddc->local_primal_size;i++) { 929 PetscScalar *y; 930 931 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 932 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 933 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 934 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 if (i<n_vertices) { 937 y[n_B*i+idx_V_B[i]] = 1.0; 938 } 939 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 940 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 941 942 if (pcbddc->switch_static || pcbddc->dbg_flag) { 943 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 944 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 945 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 948 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 949 } 950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 951 } 952 ierr = MatDestroy(&X);CHKERRQ(ierr); 953 } 954 955 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 956 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 957 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 958 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 959 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 960 /* Checking coarse_sub_mat and coarse basis functios */ 961 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 962 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 963 if (pcbddc->dbg_flag) { 964 Mat coarse_sub_mat; 965 Mat AUXMAT,TM1,TM2,TM3,TM4; 966 Mat coarse_phi_D,coarse_phi_B; 967 Mat coarse_psi_D,coarse_psi_B; 968 Mat A_II,A_BB,A_IB,A_BI; 969 Mat C_B,CPHI; 970 IS is_dummy; 971 Vec mones; 972 MatType checkmattype=MATSEQAIJ; 973 PetscReal real_value; 974 975 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 976 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 977 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 978 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 979 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 980 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 981 if (unsymmetric_check) { 982 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 983 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 984 } 985 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 986 987 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 990 if (unsymmetric_check) { 991 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 992 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 993 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 994 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 995 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 996 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 997 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 998 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 999 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1000 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1001 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1002 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1003 } else { 1004 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1005 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1006 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1007 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1008 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1009 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1010 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1011 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1012 } 1013 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1014 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1015 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1016 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1017 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1018 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1019 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1021 1022 /* check constraints */ 1023 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1024 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1025 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1026 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1027 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1028 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1029 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1030 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1031 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1032 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1033 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1034 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1035 1036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1037 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1038 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1039 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1040 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1041 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1042 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1043 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1044 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1045 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1046 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1047 if (unsymmetric_check) { 1048 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1049 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1050 } 1051 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1052 } 1053 1054 /* free memory */ 1055 ierr = PetscFree(work);CHKERRQ(ierr); 1056 if (n_vertices) { 1057 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1058 } 1059 if (n_constraints) { 1060 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1061 } 1062 /* get back data */ 1063 *coarse_submat_vals_n = coarse_submat_vals; 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1069 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1070 { 1071 Mat *work_mat; 1072 IS isrow_s,iscol_s; 1073 PetscBool rsorted,csorted; 1074 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1075 PetscErrorCode ierr; 1076 1077 PetscFunctionBegin; 1078 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1079 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1080 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1081 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1082 1083 if (!rsorted) { 1084 const PetscInt *idxs; 1085 PetscInt *idxs_sorted,i; 1086 1087 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1088 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1089 for (i=0;i<rsize;i++) { 1090 idxs_perm_r[i] = i; 1091 } 1092 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1093 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1094 for (i=0;i<rsize;i++) { 1095 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1096 } 1097 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1098 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1099 } else { 1100 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1101 isrow_s = isrow; 1102 } 1103 1104 if (!csorted) { 1105 if (isrow == iscol) { 1106 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1107 iscol_s = isrow_s; 1108 } else { 1109 const PetscInt *idxs; 1110 PetscInt *idxs_sorted,i; 1111 1112 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1113 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1114 for (i=0;i<csize;i++) { 1115 idxs_perm_c[i] = i; 1116 } 1117 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1119 for (i=0;i<csize;i++) { 1120 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1121 } 1122 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1123 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1124 } 1125 } else { 1126 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1127 iscol_s = iscol; 1128 } 1129 1130 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1131 1132 if (!rsorted || !csorted) { 1133 Mat new_mat; 1134 IS is_perm_r,is_perm_c; 1135 1136 if (!rsorted) { 1137 PetscInt *idxs_r,i; 1138 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1139 for (i=0;i<rsize;i++) { 1140 idxs_r[idxs_perm_r[i]] = i; 1141 } 1142 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1143 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1144 } else { 1145 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1146 } 1147 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1148 1149 if (!csorted) { 1150 if (isrow_s == iscol_s) { 1151 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1152 is_perm_c = is_perm_r; 1153 } else { 1154 PetscInt *idxs_c,i; 1155 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1156 for (i=0;i<csize;i++) { 1157 idxs_c[idxs_perm_c[i]] = i; 1158 } 1159 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1160 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1161 } 1162 } else { 1163 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1164 } 1165 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1166 1167 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1168 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1169 work_mat[0] = new_mat; 1170 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1171 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1172 } 1173 1174 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1175 *B = work_mat[0]; 1176 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1177 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1178 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1184 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1185 { 1186 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1187 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1188 Mat new_mat; 1189 IS is_local,is_global; 1190 PetscInt local_size; 1191 PetscBool isseqaij; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1196 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1197 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1198 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1199 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1200 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1201 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1202 1203 /* check */ 1204 if (pcbddc->dbg_flag) { 1205 Vec x,x_change; 1206 PetscReal error; 1207 1208 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1209 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1210 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1211 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1214 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1216 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1217 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1220 ierr = VecDestroy(&x);CHKERRQ(ierr); 1221 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1222 } 1223 1224 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1225 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1226 if (isseqaij) { 1227 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1228 } else { 1229 Mat work_mat; 1230 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1231 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1232 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1233 } 1234 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1235 #if !defined(PETSC_USE_COMPLEX) 1236 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1237 #endif 1238 /* 1239 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1240 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1241 */ 1242 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1248 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1249 { 1250 PC_IS* pcis = (PC_IS*)(pc->data); 1251 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1252 IS is_aux1,is_aux2; 1253 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1254 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1255 PetscInt vbs,bs; 1256 PetscBT bitmask; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 /* 1261 No need to setup local scatters if 1262 - primal space is unchanged 1263 AND 1264 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1265 AND 1266 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1267 */ 1268 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1269 PetscFunctionReturn(0); 1270 } 1271 /* destroy old objects */ 1272 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1273 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1274 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1275 /* Set Non-overlapping dimensions */ 1276 n_B = pcis->n_B; n_D = pcis->n - n_B; 1277 n_vertices = pcbddc->n_actual_vertices; 1278 /* create auxiliary bitmask */ 1279 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1280 for (i=0;i<n_vertices;i++) { 1281 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1282 } 1283 1284 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1285 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1286 for (i=0, n_R=0; i<pcis->n; i++) { 1287 if (!PetscBTLookup(bitmask,i)) { 1288 idx_R_local[n_R] = i; 1289 n_R++; 1290 } 1291 } 1292 1293 /* Block code */ 1294 vbs = 1; 1295 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1296 if (bs>1 && !(n_vertices%bs)) { 1297 PetscBool is_blocked = PETSC_TRUE; 1298 PetscInt *vary; 1299 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1300 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1301 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1302 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1303 for (i=0; i<n_vertices/bs; i++) { 1304 if (vary[i]!=0 && vary[i]!=bs) { 1305 is_blocked = PETSC_FALSE; 1306 break; 1307 } 1308 } 1309 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1310 vbs = bs; 1311 for (i=0;i<n_R/vbs;i++) { 1312 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1313 } 1314 } 1315 ierr = PetscFree(vary);CHKERRQ(ierr); 1316 } 1317 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1318 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1319 1320 /* print some info if requested */ 1321 if (pcbddc->dbg_flag) { 1322 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1323 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1329 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1330 } 1331 1332 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1333 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1334 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1335 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1336 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1337 for (i=0; i<n_D; i++) { 1338 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1339 } 1340 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1341 for (i=0, j=0; i<n_R; i++) { 1342 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1343 aux_array1[j++] = i; 1344 } 1345 } 1346 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1347 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1348 for (i=0, j=0; i<n_B; i++) { 1349 if (!PetscBTLookup(bitmask,is_indices[i])) { 1350 aux_array2[j++] = i; 1351 } 1352 } 1353 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1354 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1355 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1356 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1357 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1358 1359 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1360 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1361 for (i=0, j=0; i<n_R; i++) { 1362 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1363 aux_array1[j++] = i; 1364 } 1365 } 1366 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1367 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1368 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1369 } 1370 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1371 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1378 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1379 { 1380 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1381 PC_IS *pcis = (PC_IS*)pc->data; 1382 PC pc_temp; 1383 Mat A_RR; 1384 MatReuse reuse; 1385 PetscScalar m_one = -1.0; 1386 PetscReal value; 1387 PetscInt n_D,n_R,ibs,mbs; 1388 PetscBool use_exact,use_exact_reduced,issbaij; 1389 PetscErrorCode ierr; 1390 /* prefixes stuff */ 1391 char dir_prefix[256],neu_prefix[256],str_level[16]; 1392 size_t len; 1393 1394 PetscFunctionBegin; 1395 1396 /* compute prefixes */ 1397 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1398 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1399 if (!pcbddc->current_level) { 1400 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1401 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1402 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1403 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1406 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1407 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1408 len -= 15; /* remove "pc_bddc_coarse_" */ 1409 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1410 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1411 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1412 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1413 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1414 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1415 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1416 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1417 } 1418 1419 /* DIRICHLET PROBLEM */ 1420 if (dirichlet) { 1421 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1422 if (pcbddc->issym) { 1423 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1424 } 1425 /* Matrix for Dirichlet problem is pcis->A_II */ 1426 n_D = pcis->n - pcis->n_B; 1427 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1428 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1429 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1430 /* default */ 1431 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1432 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1434 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1435 if (issbaij) { 1436 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1437 } else { 1438 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1439 } 1440 /* Allow user's customization */ 1441 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1442 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1443 } 1444 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1445 if (sub_schurs->interior_solver) { 1446 ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr); 1447 } 1448 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1449 if (!n_D) { 1450 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1451 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1452 } 1453 /* Set Up KSP for Dirichlet problem of BDDC */ 1454 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1455 /* set ksp_D into pcis data */ 1456 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1457 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1458 pcis->ksp_D = pcbddc->ksp_D; 1459 } 1460 1461 /* NEUMANN PROBLEM */ 1462 A_RR = 0; 1463 if (neumann) { 1464 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1465 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1466 if (pcbddc->ksp_R) { /* already created ksp */ 1467 PetscInt nn_R; 1468 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1469 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1470 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1471 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1472 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1473 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1474 reuse = MAT_INITIAL_MATRIX; 1475 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1476 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1477 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1478 reuse = MAT_INITIAL_MATRIX; 1479 } else { /* safe to reuse the matrix */ 1480 reuse = MAT_REUSE_MATRIX; 1481 } 1482 } 1483 /* last check */ 1484 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1485 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1486 reuse = MAT_INITIAL_MATRIX; 1487 } 1488 } else { /* first time, so we need to create the matrix */ 1489 reuse = MAT_INITIAL_MATRIX; 1490 } 1491 /* extract A_RR */ 1492 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1493 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1494 if (ibs != mbs) { 1495 Mat newmat; 1496 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1497 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1498 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1501 } 1502 if (pcbddc->issym) { 1503 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1504 } 1505 if (!pcbddc->ksp_R) { /* create object if not present */ 1506 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1507 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1508 /* default */ 1509 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1510 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1511 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1512 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1513 if (issbaij) { 1514 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1515 } else { 1516 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1517 } 1518 /* Allow user's customization */ 1519 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1520 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1521 } 1522 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1523 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1524 if (!n_R) { 1525 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1526 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1527 } 1528 /* Set Up KSP for Neumann problem of BDDC */ 1529 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1530 } 1531 1532 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1533 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1534 if (pcbddc->dbg_flag) { 1535 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1536 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1537 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1538 } 1539 if (dirichlet) { /* Dirichlet */ 1540 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1541 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1542 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1543 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1544 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1545 /* need to be adapted? */ 1546 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1547 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1548 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1549 /* print info */ 1550 if (pcbddc->dbg_flag) { 1551 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1552 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1553 } 1554 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1555 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1556 } 1557 } 1558 if (neumann) { /* Neumann */ 1559 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1560 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1561 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1562 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1563 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1564 /* need to be adapted? */ 1565 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1566 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1567 /* print info */ 1568 if (pcbddc->dbg_flag) { 1569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1570 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1571 } 1572 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1573 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1574 } 1575 } 1576 } 1577 /* free Neumann problem's matrix */ 1578 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1584 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1585 { 1586 PetscErrorCode ierr; 1587 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1588 1589 PetscFunctionBegin; 1590 if (applytranspose) { 1591 if (pcbddc->local_auxmat1) { 1592 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1593 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1594 } 1595 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1596 } else { 1597 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1598 if (pcbddc->local_auxmat1) { 1599 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1600 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1601 } 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1609 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1610 { 1611 PetscErrorCode ierr; 1612 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1613 PC_IS* pcis = (PC_IS*) (pc->data); 1614 const PetscScalar zero = 0.0; 1615 1616 PetscFunctionBegin; 1617 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1618 if (applytranspose) { 1619 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1620 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1621 } else { 1622 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1623 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1624 } 1625 /* start communications from local primal nodes to rhs of coarse solver */ 1626 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1627 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1628 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 1630 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1631 /* TODO remove null space when doing multilevel */ 1632 if (pcbddc->coarse_ksp) { 1633 Vec rhs,sol; 1634 1635 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1636 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1637 if (applytranspose) { 1638 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1639 } else { 1640 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1641 } 1642 } 1643 1644 /* Local solution on R nodes */ 1645 if (pcis->n) { 1646 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1647 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1648 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1649 if (pcbddc->switch_static) { 1650 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1651 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1652 } 1653 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1654 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1655 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 if (pcbddc->switch_static) { 1658 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1659 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 } 1661 } 1662 1663 /* communications from coarse sol to local primal nodes */ 1664 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1665 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1666 1667 /* Sum contributions from two levels */ 1668 if (applytranspose) { 1669 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1670 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1671 } else { 1672 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1673 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1674 } 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1680 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1681 { 1682 PetscErrorCode ierr; 1683 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1684 PetscScalar *array; 1685 Vec from,to; 1686 1687 PetscFunctionBegin; 1688 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1689 from = pcbddc->coarse_vec; 1690 to = pcbddc->vec1_P; 1691 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1692 Vec tvec; 1693 1694 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1695 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1696 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1697 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1698 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1699 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1700 } 1701 } else { /* from local to global -> put data in coarse right hand side */ 1702 from = pcbddc->vec1_P; 1703 to = pcbddc->coarse_vec; 1704 } 1705 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1711 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1712 { 1713 PetscErrorCode ierr; 1714 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1715 PetscScalar *array; 1716 Vec from,to; 1717 1718 PetscFunctionBegin; 1719 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1720 from = pcbddc->coarse_vec; 1721 to = pcbddc->vec1_P; 1722 } else { /* from local to global -> put data in coarse right hand side */ 1723 from = pcbddc->vec1_P; 1724 to = pcbddc->coarse_vec; 1725 } 1726 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1727 if (smode == SCATTER_FORWARD) { 1728 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1729 Vec tvec; 1730 1731 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1732 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1733 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1734 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1735 } 1736 } else { 1737 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1738 ierr = VecResetArray(from);CHKERRQ(ierr); 1739 } 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 /* uncomment for testing purposes */ 1745 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1748 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1749 { 1750 PetscErrorCode ierr; 1751 PC_IS* pcis = (PC_IS*)(pc->data); 1752 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1753 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1754 /* one and zero */ 1755 PetscScalar one=1.0,zero=0.0; 1756 /* space to store constraints and their local indices */ 1757 PetscScalar *temp_quadrature_constraint; 1758 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1759 /* iterators */ 1760 PetscInt i,j,k,total_counts,temp_start_ptr; 1761 /* BLAS integers */ 1762 PetscBLASInt lwork,lierr; 1763 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1764 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1765 /* reuse */ 1766 PetscInt olocal_primal_size; 1767 PetscInt *oprimal_indices_local_idxs; 1768 /* change of basis */ 1769 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1770 PetscBool boolforchange,qr_needed; 1771 PetscBT touched,change_basis,qr_needed_idx; 1772 /* auxiliary stuff */ 1773 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1774 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1775 PetscScalar *temp_quadrature_constraint_work=NULL; 1776 /* some quantities */ 1777 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1778 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1779 1780 PetscFunctionBegin; 1781 /* Destroy Mat objects computed previously */ 1782 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1783 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1784 1785 /* print some info */ 1786 if (pcbddc->dbg_flag) { 1787 IS vertices; 1788 PetscInt nv,nedges,nfaces; 1789 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1790 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1791 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1792 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1793 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1794 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1796 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1797 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1798 } 1799 1800 if (!pcbddc->adaptive_selection) { 1801 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1802 MatNullSpace nearnullsp; 1803 const Vec *nearnullvecs; 1804 Vec *localnearnullsp; 1805 PetscScalar *array; 1806 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1807 PetscBool nnsp_has_cnst; 1808 /* LAPACK working arrays for SVD or POD */ 1809 PetscBool skip_lapack; 1810 PetscScalar *work; 1811 PetscReal *singular_vals; 1812 #if defined(PETSC_USE_COMPLEX) 1813 PetscReal *rwork; 1814 #endif 1815 #if defined(PETSC_MISSING_LAPACK_GESVD) 1816 PetscScalar *temp_basis,*correlation_mat; 1817 #else 1818 PetscBLASInt dummy_int=1; 1819 PetscScalar dummy_scalar=1.; 1820 #endif 1821 1822 /* Get index sets for faces, edges and vertices from graph */ 1823 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1824 /* free unneeded index sets */ 1825 if (!pcbddc->use_vertices) { 1826 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1827 } 1828 if (!pcbddc->use_edges) { 1829 for (i=0;i<n_ISForEdges;i++) { 1830 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1831 } 1832 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1833 n_ISForEdges = 0; 1834 } 1835 if (!pcbddc->use_faces) { 1836 for (i=0;i<n_ISForFaces;i++) { 1837 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1838 } 1839 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1840 n_ISForFaces = 0; 1841 } 1842 /* HACKS (the following two blocks of code) */ 1843 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1844 pcbddc->use_change_of_basis = PETSC_TRUE; 1845 if (!ISForEdges) { 1846 pcbddc->use_change_on_faces = PETSC_TRUE; 1847 } 1848 } 1849 if (pcbddc->NullSpace) { 1850 /* use_change_of_basis should be consistent among processors */ 1851 PetscBool tbool[2],gbool[2]; 1852 tbool[0] = pcbddc->use_change_of_basis; 1853 tbool[1] = pcbddc->use_change_on_faces; 1854 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1855 pcbddc->use_change_of_basis = gbool[0]; 1856 pcbddc->use_change_on_faces = gbool[1]; 1857 } 1858 1859 /* check if near null space is attached to global mat */ 1860 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1861 if (nearnullsp) { 1862 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1863 /* remove any stored info */ 1864 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1865 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1866 /* store information for BDDC solver reuse */ 1867 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1868 pcbddc->onearnullspace = nearnullsp; 1869 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1870 for (i=0;i<nnsp_size;i++) { 1871 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1872 } 1873 } else { /* if near null space is not provided BDDC uses constants by default */ 1874 nnsp_size = 0; 1875 nnsp_has_cnst = PETSC_TRUE; 1876 } 1877 /* get max number of constraints on a single cc */ 1878 max_constraints = nnsp_size; 1879 if (nnsp_has_cnst) max_constraints++; 1880 1881 /* 1882 Evaluate maximum storage size needed by the procedure 1883 - temp_indices will contain start index of each constraint stored as follows 1884 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1885 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1886 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1887 */ 1888 total_counts = n_ISForFaces+n_ISForEdges; 1889 total_counts *= max_constraints; 1890 n_vertices = 0; 1891 if (ISForVertices) { 1892 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1893 } 1894 total_counts += n_vertices; 1895 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1896 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1897 total_counts = 0; 1898 max_size_of_constraint = 0; 1899 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1900 if (i<n_ISForEdges) { 1901 used_IS = &ISForEdges[i]; 1902 } else { 1903 used_IS = &ISForFaces[i-n_ISForEdges]; 1904 } 1905 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1906 total_counts += j; 1907 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1908 } 1909 total_counts *= max_constraints; 1910 total_counts += n_vertices; 1911 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1912 /* get local part of global near null space vectors */ 1913 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1914 for (k=0;k<nnsp_size;k++) { 1915 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1916 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1917 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1918 } 1919 1920 /* whether or not to skip lapack calls */ 1921 skip_lapack = PETSC_TRUE; 1922 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1923 1924 /* allocate some auxiliary stuff */ 1925 if (!skip_lapack || pcbddc->use_qr_single) { 1926 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1927 } else { 1928 gidxs = NULL; 1929 permutation = NULL; 1930 temp_indices_to_constraint_work = NULL; 1931 temp_quadrature_constraint_work = NULL; 1932 } 1933 1934 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1935 if (!skip_lapack) { 1936 PetscScalar temp_work; 1937 1938 #if defined(PETSC_MISSING_LAPACK_GESVD) 1939 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1940 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1941 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1942 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1943 #if defined(PETSC_USE_COMPLEX) 1944 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1945 #endif 1946 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1947 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1948 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1949 lwork = -1; 1950 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1951 #if !defined(PETSC_USE_COMPLEX) 1952 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1953 #else 1954 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1955 #endif 1956 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1957 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1958 #else /* on missing GESVD */ 1959 /* SVD */ 1960 PetscInt max_n,min_n; 1961 max_n = max_size_of_constraint; 1962 min_n = max_constraints; 1963 if (max_size_of_constraint < max_constraints) { 1964 min_n = max_size_of_constraint; 1965 max_n = max_constraints; 1966 } 1967 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1968 #if defined(PETSC_USE_COMPLEX) 1969 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1970 #endif 1971 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1972 lwork = -1; 1973 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1974 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1975 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1976 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1977 #if !defined(PETSC_USE_COMPLEX) 1978 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1979 #else 1980 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1981 #endif 1982 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1983 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1984 #endif /* on missing GESVD */ 1985 /* Allocate optimal workspace */ 1986 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1987 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1988 } 1989 /* Now we can loop on constraining sets */ 1990 total_counts = 0; 1991 temp_indices[0] = 0; 1992 /* vertices */ 1993 if (ISForVertices) { 1994 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1995 if (nnsp_has_cnst) { /* consider all vertices */ 1996 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1997 for (i=0;i<n_vertices;i++) { 1998 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1999 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2000 total_counts++; 2001 } 2002 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2003 PetscBool used_vertex; 2004 for (i=0;i<n_vertices;i++) { 2005 used_vertex = PETSC_FALSE; 2006 k = 0; 2007 while (!used_vertex && k<nnsp_size) { 2008 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2009 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2010 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2011 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2012 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2013 total_counts++; 2014 used_vertex = PETSC_TRUE; 2015 } 2016 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2017 k++; 2018 } 2019 } 2020 } 2021 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2022 n_vertices = total_counts; 2023 } 2024 2025 /* edges and faces */ 2026 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2027 if (ncc<n_ISForEdges) { 2028 used_IS = &ISForEdges[ncc]; 2029 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2030 } else { 2031 used_IS = &ISForFaces[ncc-n_ISForEdges]; 2032 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2033 } 2034 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2035 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2036 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2037 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2038 /* change of basis should not be performed on local periodic nodes */ 2039 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2040 if (nnsp_has_cnst) { 2041 PetscScalar quad_value; 2042 temp_constraints++; 2043 if (!pcbddc->use_nnsp_true) { 2044 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2045 } else { 2046 quad_value = 1.0; 2047 } 2048 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2049 for (j=0;j<size_of_constraint;j++) { 2050 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2051 } 2052 /* sort by global ordering if using lapack subroutines (not needed!) */ 2053 if (!skip_lapack || pcbddc->use_qr_single) { 2054 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2055 for (j=0;j<size_of_constraint;j++) { 2056 permutation[j]=j; 2057 } 2058 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2059 for (j=0;j<size_of_constraint;j++) { 2060 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2061 } 2062 for (j=0;j<size_of_constraint;j++) { 2063 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2064 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2065 } 2066 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2067 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2068 } 2069 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2070 total_counts++; 2071 } 2072 for (k=0;k<nnsp_size;k++) { 2073 PetscReal real_value; 2074 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2075 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2076 for (j=0;j<size_of_constraint;j++) { 2077 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2078 } 2079 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2080 /* check if array is null on the connected component */ 2081 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2082 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2083 if (real_value > 0.0) { /* keep indices and values */ 2084 /* sort by global ordering if using lapack subroutines */ 2085 if (!skip_lapack || pcbddc->use_qr_single) { 2086 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2087 for (j=0;j<size_of_constraint;j++) { 2088 permutation[j]=j; 2089 } 2090 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2091 for (j=0;j<size_of_constraint;j++) { 2092 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2093 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2094 } 2095 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2096 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2097 } 2098 temp_constraints++; 2099 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2100 total_counts++; 2101 } 2102 } 2103 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2104 valid_constraints = temp_constraints; 2105 if (!pcbddc->use_nnsp_true && temp_constraints) { 2106 if (temp_constraints == 1) { /* just normalize the constraint */ 2107 PetscScalar norm; 2108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2109 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2110 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2111 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2112 } else { /* perform SVD */ 2113 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2114 2115 #if defined(PETSC_MISSING_LAPACK_GESVD) 2116 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2117 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2118 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2119 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2120 from that computed using LAPACKgesvd 2121 -> This is due to a different computation of eigenvectors in LAPACKheev 2122 -> The quality of the POD-computed basis will be the same */ 2123 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2124 /* Store upper triangular part of correlation matrix */ 2125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2126 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2127 for (j=0;j<temp_constraints;j++) { 2128 for (k=0;k<j+1;k++) { 2129 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2130 } 2131 } 2132 /* compute eigenvalues and eigenvectors of correlation matrix */ 2133 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2134 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2135 #if !defined(PETSC_USE_COMPLEX) 2136 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2137 #else 2138 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2139 #endif 2140 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2141 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2142 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2143 j = 0; 2144 while (j < temp_constraints && singular_vals[j] < tol) j++; 2145 total_counts = total_counts-j; 2146 valid_constraints = temp_constraints-j; 2147 /* scale and copy POD basis into used quadrature memory */ 2148 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2150 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2151 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2152 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2153 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2154 if (j<temp_constraints) { 2155 PetscInt ii; 2156 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2157 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2158 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2159 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2160 for (k=0;k<temp_constraints-j;k++) { 2161 for (ii=0;ii<size_of_constraint;ii++) { 2162 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2163 } 2164 } 2165 } 2166 #else /* on missing GESVD */ 2167 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2168 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2169 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2170 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2171 #if !defined(PETSC_USE_COMPLEX) 2172 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2173 #else 2174 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2175 #endif 2176 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2177 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2178 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2179 k = temp_constraints; 2180 if (k > size_of_constraint) k = size_of_constraint; 2181 j = 0; 2182 while (j < k && singular_vals[k-j-1] < tol) j++; 2183 valid_constraints = k-j; 2184 total_counts = total_counts-temp_constraints+valid_constraints; 2185 #endif /* on missing GESVD */ 2186 } 2187 } 2188 /* setting change_of_basis flag is safe now */ 2189 if (boolforchange) { 2190 for (j=0;j<valid_constraints;j++) { 2191 PetscBTSet(change_basis,total_counts-j-1); 2192 } 2193 } 2194 } 2195 /* free workspace */ 2196 if (!skip_lapack || pcbddc->use_qr_single) { 2197 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2198 } 2199 if (!skip_lapack) { 2200 ierr = PetscFree(work);CHKERRQ(ierr); 2201 #if defined(PETSC_USE_COMPLEX) 2202 ierr = PetscFree(rwork);CHKERRQ(ierr); 2203 #endif 2204 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2205 #if defined(PETSC_MISSING_LAPACK_GESVD) 2206 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2207 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2208 #endif 2209 } 2210 for (k=0;k<nnsp_size;k++) { 2211 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2212 } 2213 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2214 /* free index sets of faces, edges and vertices */ 2215 for (i=0;i<n_ISForFaces;i++) { 2216 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2217 } 2218 if (n_ISForFaces) { 2219 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2220 } 2221 for (i=0;i<n_ISForEdges;i++) { 2222 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2223 } 2224 if (n_ISForEdges) { 2225 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2226 } 2227 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2228 } else { 2229 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2230 PetscInt cum = 0; 2231 2232 total_counts = 0; 2233 n_vertices = 0; 2234 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 2235 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2236 } 2237 max_constraints = 0; 2238 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2239 total_counts += pcbddc->adaptive_constraints_n[i]; 2240 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2241 } 2242 temp_indices = pcbddc->adaptive_constraints_ptrs; 2243 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2244 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2245 2246 #if 0 2247 printf("Found %d totals\n",total_counts); 2248 for (i=0;i<total_counts;i++) { 2249 printf("const %d, start %d",i,temp_indices[i]); 2250 printf(" end %d:\n",temp_indices[i+1]); 2251 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2252 printf(" idxs %d",temp_indices_to_constraint[j]); 2253 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2254 } 2255 } 2256 for (i=0;i<n_vertices;i++) { 2257 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2258 } 2259 for (i=0;i<sub_schurs->n_subs;i++) { 2260 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2261 } 2262 #endif 2263 2264 max_size_of_constraint = 0; 2265 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2266 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2267 /* Change of basis */ 2268 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2269 if (pcbddc->use_change_of_basis) { 2270 cum = n_vertices; 2271 for (i=0;i<sub_schurs->n_subs;i++) { 2272 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2273 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2274 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2275 } 2276 } 2277 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2278 } 2279 } 2280 } 2281 2282 /* map temp_indices_to_constraint in boundary numbering */ 2283 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2284 if (i != temp_indices[total_counts]) { 2285 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2286 } 2287 2288 /* set quantities in pcbddc data structure and store previous primal size */ 2289 /* n_vertices defines the number of subdomain corners in the primal space */ 2290 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2291 olocal_primal_size = pcbddc->local_primal_size; 2292 pcbddc->local_primal_size = total_counts; 2293 pcbddc->n_vertices = n_vertices; 2294 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2295 2296 /* Create constraint matrix */ 2297 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2298 /* so we need to set it up properly either with or without change of basis */ 2299 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2300 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2301 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2302 /* array to compute a local numbering of constraints : vertices first then constraints */ 2303 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2304 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2305 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2306 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2307 /* auxiliary stuff for basis change */ 2308 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2309 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2310 2311 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2312 total_primal_vertices=0; 2313 for (i=0;i<pcbddc->local_primal_size;i++) { 2314 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2315 if (size_of_constraint == 1) { 2316 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2317 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2318 aux_primal_minloc[total_primal_vertices]=0; 2319 total_primal_vertices++; 2320 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2321 PetscInt min_loc,min_index; 2322 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2323 /* find first untouched local node */ 2324 k = 0; 2325 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2326 min_index = global_indices[k]; 2327 min_loc = k; 2328 /* search the minimum among global nodes already untouched on the cc */ 2329 for (k=1;k<size_of_constraint;k++) { 2330 /* there can be more than one constraint on a single connected component */ 2331 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2332 min_index = global_indices[k]; 2333 min_loc = k; 2334 } 2335 } 2336 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2337 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2338 aux_primal_minloc[total_primal_vertices]=min_loc; 2339 total_primal_vertices++; 2340 } 2341 } 2342 /* determine if a QR strategy is needed for change of basis */ 2343 qr_needed = PETSC_FALSE; 2344 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2345 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2346 if (PetscBTLookup(change_basis,i)) { 2347 if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) { 2348 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2349 j = 0; 2350 for (k=0;k<size_of_constraint;k++) { 2351 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2352 j++; 2353 } 2354 } 2355 /* found more than one primal dof on the cc */ 2356 if (j > 1) { 2357 PetscBTSet(qr_needed_idx,i); 2358 qr_needed = PETSC_TRUE; 2359 } 2360 } else { 2361 PetscBTSet(qr_needed_idx,i); 2362 qr_needed = PETSC_TRUE; 2363 } 2364 } 2365 } 2366 /* free workspace */ 2367 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2368 2369 /* permute indices in order to have a sorted set of vertices */ 2370 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2371 2372 /* nonzero structure of constraint matrix */ 2373 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2374 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2375 j=total_primal_vertices; 2376 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2377 if (!PetscBTLookup(change_basis,i)) { 2378 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2379 j++; 2380 } 2381 } 2382 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2383 ierr = PetscFree(nnz);CHKERRQ(ierr); 2384 /* set values in constraint matrix */ 2385 for (i=0;i<total_primal_vertices;i++) { 2386 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2387 } 2388 total_counts = total_primal_vertices; 2389 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2390 if (!PetscBTLookup(change_basis,i)) { 2391 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2392 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2393 total_counts++; 2394 } 2395 } 2396 /* assembling */ 2397 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2398 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2399 /* 2400 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2401 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2402 */ 2403 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2404 if (pcbddc->use_change_of_basis) { 2405 /* dual and primal dofs on a single cc */ 2406 PetscInt dual_dofs,primal_dofs; 2407 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2408 PetscInt primal_counter; 2409 /* working stuff for GEQRF */ 2410 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2411 PetscBLASInt lqr_work; 2412 /* working stuff for UNGQR */ 2413 PetscScalar *gqr_work,lgqr_work_t; 2414 PetscBLASInt lgqr_work; 2415 /* working stuff for TRTRS */ 2416 PetscScalar *trs_rhs; 2417 PetscBLASInt Blas_NRHS; 2418 /* pointers for values insertion into change of basis matrix */ 2419 PetscInt *start_rows,*start_cols; 2420 PetscScalar *start_vals; 2421 /* working stuff for values insertion */ 2422 PetscBT is_primal; 2423 /* matrix sizes */ 2424 PetscInt global_size,local_size; 2425 /* temporary change of basis */ 2426 Mat localChangeOfBasisMatrix; 2427 /* extra space for debugging */ 2428 PetscScalar *dbg_work; 2429 2430 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2431 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2432 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2433 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2434 /* nonzeros for local mat */ 2435 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2436 for (i=0;i<pcis->n;i++) nnz[i]=1; 2437 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2438 if (PetscBTLookup(change_basis,i)) { 2439 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2440 if (PetscBTLookup(qr_needed_idx,i)) { 2441 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2442 } else { 2443 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2444 /* get local primal index on the cc */ 2445 j = 0; 2446 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2447 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2448 } 2449 } 2450 } 2451 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2452 ierr = PetscFree(nnz);CHKERRQ(ierr); 2453 /* Set initial identity in the matrix */ 2454 for (i=0;i<pcis->n;i++) { 2455 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2456 } 2457 2458 if (pcbddc->dbg_flag) { 2459 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2461 } 2462 2463 2464 /* Now we loop on the constraints which need a change of basis */ 2465 /* 2466 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2467 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2468 2469 Basic blocks of change of basis matrix T computed by 2470 2471 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2472 2473 | 1 0 ... 0 s_1/S | 2474 | 0 1 ... 0 s_2/S | 2475 | ... | 2476 | 0 ... 1 s_{n-1}/S | 2477 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2478 2479 with S = \sum_{i=1}^n s_i^2 2480 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2481 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2482 2483 - QR decomposition of constraints otherwise 2484 */ 2485 if (qr_needed) { 2486 /* space to store Q */ 2487 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2488 /* first we issue queries for optimal work */ 2489 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2490 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2491 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2492 lqr_work = -1; 2493 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2494 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2495 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2496 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2497 lgqr_work = -1; 2498 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2499 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2500 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2501 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2502 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2503 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2504 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2505 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2506 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2507 /* array to store scaling factors for reflectors */ 2508 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2509 /* array to store rhs and solution of triangular solver */ 2510 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2511 /* allocating workspace for check */ 2512 if (pcbddc->dbg_flag) { 2513 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2514 } 2515 } 2516 /* array to store whether a node is primal or not */ 2517 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2518 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2519 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2520 if (i != total_primal_vertices) { 2521 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2522 } 2523 for (i=0;i<total_primal_vertices;i++) { 2524 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2525 } 2526 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2527 2528 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2529 /* -> using implicit ordering contained in temp_indices data */ 2530 total_counts = pcbddc->n_vertices; 2531 primal_counter = total_counts; 2532 while (total_counts<pcbddc->local_primal_size) { 2533 primal_dofs = 1; 2534 if (PetscBTLookup(change_basis,total_counts)) { 2535 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2536 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2537 primal_dofs++; 2538 } 2539 /* get constraint info */ 2540 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2541 dual_dofs = size_of_constraint-primal_dofs; 2542 2543 if (pcbddc->dbg_flag) { 2544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2545 } 2546 2547 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2548 2549 /* copy quadrature constraints for change of basis check */ 2550 if (pcbddc->dbg_flag) { 2551 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2552 } 2553 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2554 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2555 2556 /* compute QR decomposition of constraints */ 2557 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2558 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2560 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2561 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2562 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2563 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2564 2565 /* explictly compute R^-T */ 2566 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2567 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2568 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2571 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2572 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2573 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2574 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2575 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2576 2577 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2578 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2580 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2581 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2582 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2583 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2584 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2585 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2586 2587 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2588 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2589 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2590 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2591 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2592 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2593 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2594 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2595 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2596 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2597 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2598 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2599 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2600 2601 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2602 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2603 /* insert cols for primal dofs */ 2604 for (j=0;j<primal_dofs;j++) { 2605 start_vals = &qr_basis[j*size_of_constraint]; 2606 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2607 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2608 } 2609 /* insert cols for dual dofs */ 2610 for (j=0,k=0;j<dual_dofs;k++) { 2611 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2612 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2613 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2614 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2615 j++; 2616 } 2617 } 2618 2619 /* check change of basis */ 2620 if (pcbddc->dbg_flag) { 2621 PetscInt ii,jj; 2622 PetscBool valid_qr=PETSC_TRUE; 2623 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2624 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2625 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2626 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2627 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2628 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2629 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2630 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2631 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2632 for (jj=0;jj<size_of_constraint;jj++) { 2633 for (ii=0;ii<primal_dofs;ii++) { 2634 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2635 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2636 } 2637 } 2638 if (!valid_qr) { 2639 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2640 for (jj=0;jj<size_of_constraint;jj++) { 2641 for (ii=0;ii<primal_dofs;ii++) { 2642 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2643 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2644 } 2645 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2646 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2647 } 2648 } 2649 } 2650 } else { 2651 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2652 } 2653 } 2654 } else { /* simple transformation block */ 2655 PetscInt row,col; 2656 PetscScalar val,norm; 2657 2658 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2659 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2660 for (j=0;j<size_of_constraint;j++) { 2661 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2662 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2663 if (!PetscBTLookup(is_primal,row_B)) { 2664 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2665 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2666 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2667 } else { 2668 for (k=0;k<size_of_constraint;k++) { 2669 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2670 if (row != col) { 2671 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2672 } else { 2673 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2674 } 2675 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2676 } 2677 } 2678 } 2679 if (pcbddc->dbg_flag) { 2680 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2681 } 2682 } 2683 /* increment primal counter */ 2684 primal_counter += primal_dofs; 2685 } else { 2686 if (pcbddc->dbg_flag) { 2687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2688 } 2689 } 2690 /* increment constraint counter total_counts */ 2691 total_counts += primal_dofs; 2692 } 2693 2694 /* free workspace */ 2695 if (qr_needed) { 2696 if (pcbddc->dbg_flag) { 2697 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2698 } 2699 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2700 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2701 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2702 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2703 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2704 } 2705 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2706 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2708 2709 /* assembling of global change of variable */ 2710 { 2711 Mat tmat; 2712 PetscInt bs; 2713 2714 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2715 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2716 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2717 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2718 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2719 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2720 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2721 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2722 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2723 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2724 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2725 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2726 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2727 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2728 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2729 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2730 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2731 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2732 } 2733 /* check */ 2734 if (pcbddc->dbg_flag) { 2735 PetscReal error; 2736 Vec x,x_change; 2737 2738 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2739 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2740 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2741 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2742 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2743 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2744 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2745 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2746 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2747 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2748 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2749 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2750 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2751 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2752 ierr = VecDestroy(&x);CHKERRQ(ierr); 2753 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2754 } 2755 2756 /* adapt sub_schurs computed (if any) */ 2757 if (pcbddc->use_deluxe_scaling) { 2758 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2759 if (sub_schurs->S_Ej_all) { 2760 Mat S_new,tmat; 2761 IS is_all_N; 2762 2763 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2764 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2765 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2766 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2767 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2768 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2769 sub_schurs->S_Ej_all = S_new; 2770 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2771 if (sub_schurs->sum_S_Ej_all) { 2772 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2773 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2774 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2775 sub_schurs->sum_S_Ej_all = S_new; 2776 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2777 } 2778 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2779 } 2780 } 2781 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2782 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2783 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2784 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2785 } 2786 2787 /* set up change of basis context */ 2788 if (pcbddc->ChangeOfBasisMatrix) { 2789 PCBDDCChange_ctx change_ctx; 2790 2791 if (!pcbddc->new_global_mat) { 2792 PetscInt global_size,local_size; 2793 2794 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2795 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2796 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2797 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2798 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2799 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2800 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2801 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2802 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2803 } else { 2804 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2805 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2806 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2807 } 2808 if (!pcbddc->user_ChangeOfBasisMatrix) { 2809 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2810 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2811 } else { 2812 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2813 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2814 } 2815 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2816 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2817 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2818 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 } 2820 2821 /* get indices in local ordering for vertices and constraints */ 2822 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2823 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2824 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2825 } 2826 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2827 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2828 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2829 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2830 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2831 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2832 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2833 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2834 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2835 /* set quantities in PCBDDC data struct */ 2836 pcbddc->n_actual_vertices = i; 2837 /* check if a new primal space has been introduced */ 2838 pcbddc->new_primal_space_local = PETSC_TRUE; 2839 if (olocal_primal_size == pcbddc->local_primal_size) { 2840 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2841 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2842 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2843 } 2844 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2845 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2846 2847 /* flush dbg viewer */ 2848 if (pcbddc->dbg_flag) { 2849 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2850 } 2851 2852 /* free workspace */ 2853 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2854 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2855 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2856 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2857 if (!pcbddc->adaptive_selection) { 2858 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2859 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2860 } else { 2861 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2862 pcbddc->adaptive_constraints_ptrs, 2863 pcbddc->adaptive_constraints_idxs, 2864 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2865 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2866 } 2867 PetscFunctionReturn(0); 2868 } 2869 2870 #undef __FUNCT__ 2871 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2872 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2873 { 2874 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2875 PC_IS *pcis = (PC_IS*)pc->data; 2876 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2877 PetscInt ierr,i,vertex_size; 2878 PetscViewer viewer=pcbddc->dbg_viewer; 2879 2880 PetscFunctionBegin; 2881 /* Reset previously computed graph */ 2882 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2883 /* Init local Graph struct */ 2884 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2885 2886 /* Check validity of the csr graph passed in by the user */ 2887 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2888 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2889 } 2890 2891 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2892 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2893 PetscInt *xadj,*adjncy; 2894 PetscInt nvtxs; 2895 PetscBool flg_row=PETSC_FALSE; 2896 2897 if (pcbddc->use_local_adj) { 2898 2899 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2900 if (flg_row) { 2901 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2902 pcbddc->computed_rowadj = PETSC_TRUE; 2903 } 2904 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2905 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2906 IS is_dummy; 2907 ISLocalToGlobalMapping l2gmap_dummy; 2908 PetscInt j,sum; 2909 PetscInt *cxadj,*cadjncy; 2910 const PetscInt *idxs; 2911 PCBDDCGraph graph; 2912 PetscBT is_on_boundary; 2913 2914 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2915 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2916 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2917 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2918 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2919 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2920 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2921 if (flg_row) { 2922 graph->xadj = xadj; 2923 graph->adjncy = adjncy; 2924 } 2925 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2926 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2927 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2928 2929 if (pcbddc->dbg_flag) { 2930 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2931 for (i=0;i<graph->ncc;i++) { 2932 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2933 } 2934 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2935 } 2936 2937 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2938 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2939 for (i=0;i<pcis->n_B;i++) { 2940 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2941 } 2942 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2943 2944 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2945 sum = 0; 2946 for (i=0;i<graph->ncc;i++) { 2947 PetscInt sizecc = 0; 2948 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2949 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2950 sizecc++; 2951 } 2952 } 2953 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2954 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2955 cxadj[graph->queue[j]] = sizecc; 2956 } 2957 } 2958 sum += sizecc*sizecc; 2959 } 2960 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2961 sum = 0; 2962 for (i=0;i<pcis->n;i++) { 2963 PetscInt temp = cxadj[i]; 2964 cxadj[i] = sum; 2965 sum += temp; 2966 } 2967 cxadj[pcis->n] = sum; 2968 for (i=0;i<graph->ncc;i++) { 2969 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2970 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2971 PetscInt k,sizecc = 0; 2972 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2973 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2974 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2975 sizecc++; 2976 } 2977 } 2978 } 2979 } 2980 } 2981 if (pcis->n) { 2982 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2983 } else { 2984 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2985 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2986 } 2987 graph->xadj = 0; 2988 graph->adjncy = 0; 2989 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2990 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2991 } 2992 } 2993 2994 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2995 vertex_size = 1; 2996 if (pcbddc->user_provided_isfordofs) { 2997 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2998 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2999 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3000 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3001 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3002 } 3003 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3004 pcbddc->n_ISForDofs = 0; 3005 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3006 } 3007 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3008 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3009 } else { 3010 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3011 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3012 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3013 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3014 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3015 } 3016 } 3017 } 3018 3019 /* Setup of Graph */ 3020 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3021 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3022 } 3023 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3024 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3025 } 3026 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3027 3028 /* Graph's connected components analysis */ 3029 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3030 3031 /* print some info to stdout */ 3032 if (pcbddc->dbg_flag) { 3033 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3034 } 3035 3036 /* mark topography has done */ 3037 pcbddc->recompute_topography = PETSC_FALSE; 3038 PetscFunctionReturn(0); 3039 } 3040 3041 #undef __FUNCT__ 3042 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3043 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3044 { 3045 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3046 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3047 PetscErrorCode ierr; 3048 3049 PetscFunctionBegin; 3050 n = 0; 3051 vertices = 0; 3052 if (pcbddc->ConstraintMatrix) { 3053 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3054 for (i=0;i<local_primal_size;i++) { 3055 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3056 if (size_of_constraint == 1) n++; 3057 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3058 } 3059 if (vertices_idx) { 3060 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3061 n = 0; 3062 for (i=0;i<local_primal_size;i++) { 3063 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3064 if (size_of_constraint == 1) { 3065 vertices[n++]=row_cmat_indices[0]; 3066 } 3067 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3068 } 3069 } 3070 } 3071 *n_vertices = n; 3072 if (vertices_idx) *vertices_idx = vertices; 3073 PetscFunctionReturn(0); 3074 } 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3078 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3079 { 3080 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3081 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3082 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3083 PetscBT touched; 3084 PetscErrorCode ierr; 3085 3086 /* This function assumes that the number of local constraints per connected component 3087 is not greater than the number of nodes defined for the connected component 3088 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3089 PetscFunctionBegin; 3090 n = 0; 3091 constraints_index = 0; 3092 if (pcbddc->ConstraintMatrix) { 3093 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3094 max_size_of_constraint = 0; 3095 for (i=0;i<local_primal_size;i++) { 3096 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3097 if (size_of_constraint > 1) { 3098 n++; 3099 } 3100 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3101 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3102 } 3103 if (constraints_idx) { 3104 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3105 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3106 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3107 n = 0; 3108 for (i=0;i<local_primal_size;i++) { 3109 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3110 if (size_of_constraint > 1) { 3111 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3112 /* find first untouched local node */ 3113 j = 0; 3114 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3115 min_index = row_cmat_global_indices[j]; 3116 min_loc = j; 3117 /* search the minimum among nodes not yet touched on the connected component 3118 since there can be more than one constraint on a single cc */ 3119 for (j=1;j<size_of_constraint;j++) { 3120 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3121 min_index = row_cmat_global_indices[j]; 3122 min_loc = j; 3123 } 3124 } 3125 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3126 constraints_index[n++] = row_cmat_indices[min_loc]; 3127 } 3128 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3129 } 3130 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3131 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3132 } 3133 } 3134 *n_constraints = n; 3135 if (constraints_idx) *constraints_idx = constraints_index; 3136 PetscFunctionReturn(0); 3137 } 3138 3139 #undef __FUNCT__ 3140 #define __FUNCT__ "PCBDDCSubsetNumbering" 3141 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3142 { 3143 Vec local_vec,global_vec; 3144 IS seqis,paris; 3145 VecScatter scatter_ctx; 3146 PetscScalar *array; 3147 PetscInt *temp_global_dofs; 3148 PetscScalar globalsum; 3149 PetscInt i,j,s; 3150 PetscInt nlocals,first_index,old_index,max_local,max_global; 3151 PetscMPIInt rank_prec_comm,size_prec_comm; 3152 PetscInt *dof_sizes,*dof_displs; 3153 PetscBool first_found; 3154 PetscErrorCode ierr; 3155 3156 PetscFunctionBegin; 3157 /* mpi buffers */ 3158 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3159 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3160 j = ( !rank_prec_comm ? size_prec_comm : 0); 3161 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3162 /* get maximum size of subset */ 3163 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3164 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3165 max_local = 0; 3166 for (i=0;i<n_local_dofs;i++) { 3167 if (max_local < temp_global_dofs[i] ) { 3168 max_local = temp_global_dofs[i]; 3169 } 3170 } 3171 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3172 max_global++; 3173 max_local = 0; 3174 for (i=0;i<n_local_dofs;i++) { 3175 if (max_local < local_dofs[i] ) { 3176 max_local = local_dofs[i]; 3177 } 3178 } 3179 max_local++; 3180 /* allocate workspace */ 3181 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3182 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3183 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3184 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3185 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3186 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3187 /* create scatter */ 3188 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3189 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3190 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3191 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3192 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3193 /* init array */ 3194 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3195 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3196 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3197 if (local_dofs_mult) { 3198 for (i=0;i<n_local_dofs;i++) { 3199 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3200 } 3201 } else { 3202 for (i=0;i<n_local_dofs;i++) { 3203 array[local_dofs[i]]=1.0; 3204 } 3205 } 3206 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3207 /* scatter into global vec and get total number of global dofs */ 3208 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3209 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3210 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3211 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3212 /* Fill global_vec with cumulative function for global numbering */ 3213 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3214 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3215 nlocals = 0; 3216 first_index = -1; 3217 first_found = PETSC_FALSE; 3218 for (i=0;i<s;i++) { 3219 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3220 first_found = PETSC_TRUE; 3221 first_index = i; 3222 } 3223 nlocals += (PetscInt)PetscRealPart(array[i]); 3224 } 3225 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3226 if (!rank_prec_comm) { 3227 dof_displs[0]=0; 3228 for (i=1;i<size_prec_comm;i++) { 3229 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3230 } 3231 } 3232 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3233 if (first_found) { 3234 array[first_index] += (PetscScalar)nlocals; 3235 old_index = first_index; 3236 for (i=first_index+1;i<s;i++) { 3237 if (PetscRealPart(array[i]) > 0.1) { 3238 array[i] += array[old_index]; 3239 old_index = i; 3240 } 3241 } 3242 } 3243 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3244 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3245 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3246 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3247 /* get global ordering of local dofs */ 3248 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3249 if (local_dofs_mult) { 3250 for (i=0;i<n_local_dofs;i++) { 3251 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3252 } 3253 } else { 3254 for (i=0;i<n_local_dofs;i++) { 3255 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3256 } 3257 } 3258 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3259 /* free workspace */ 3260 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3261 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3262 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3263 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3264 /* return pointer to global ordering of local dofs */ 3265 *global_numbering_subset = temp_global_dofs; 3266 PetscFunctionReturn(0); 3267 } 3268 3269 #undef __FUNCT__ 3270 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3271 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3272 { 3273 PetscInt i,j; 3274 PetscScalar *alphas; 3275 PetscErrorCode ierr; 3276 3277 PetscFunctionBegin; 3278 /* this implements stabilized Gram-Schmidt */ 3279 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3280 for (i=0;i<n;i++) { 3281 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3282 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3283 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3284 } 3285 ierr = PetscFree(alphas);CHKERRQ(ierr); 3286 PetscFunctionReturn(0); 3287 } 3288 3289 #undef __FUNCT__ 3290 #define __FUNCT__ "MatISGetSubassemblingPattern" 3291 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3292 { 3293 Mat subdomain_adj; 3294 IS new_ranks,ranks_send_to; 3295 MatPartitioning partitioner; 3296 Mat_IS *matis; 3297 PetscInt n_neighs,*neighs,*n_shared,**shared; 3298 PetscInt prank; 3299 PetscMPIInt size,rank,color; 3300 PetscInt *xadj,*adjncy,*oldranks; 3301 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3302 PetscInt i,local_size,threshold=0; 3303 PetscErrorCode ierr; 3304 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3305 PetscSubcomm subcomm; 3306 3307 PetscFunctionBegin; 3308 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3309 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3310 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3311 3312 /* Get info on mapping */ 3313 matis = (Mat_IS*)(mat->data); 3314 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3315 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3316 3317 /* build local CSR graph of subdomains' connectivity */ 3318 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3319 xadj[0] = 0; 3320 xadj[1] = PetscMax(n_neighs-1,0); 3321 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3322 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3323 3324 if (threshold) { 3325 PetscInt xadj_count = 0; 3326 for (i=1;i<n_neighs;i++) { 3327 if (n_shared[i] > threshold) { 3328 adjncy[xadj_count] = neighs[i]; 3329 adjncy_wgt[xadj_count] = n_shared[i]; 3330 xadj_count++; 3331 } 3332 } 3333 xadj[1] = xadj_count; 3334 } else { 3335 if (xadj[1]) { 3336 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3337 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3338 } 3339 } 3340 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3341 if (use_square) { 3342 for (i=0;i<xadj[1];i++) { 3343 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3344 } 3345 } 3346 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3347 3348 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3349 3350 /* 3351 Restrict work on active processes only. 3352 */ 3353 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3354 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3355 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3356 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3357 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3358 if (color) { 3359 ierr = PetscFree(xadj);CHKERRQ(ierr); 3360 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3361 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3362 } else { 3363 PetscInt coarsening_ratio; 3364 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3365 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3366 prank = rank; 3367 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3368 /* 3369 for (i=0;i<size;i++) { 3370 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3371 } 3372 */ 3373 for (i=0;i<xadj[1];i++) { 3374 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3375 } 3376 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3377 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3378 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3379 3380 /* Partition */ 3381 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3382 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3383 if (use_vwgt) { 3384 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3385 v_wgt[0] = local_size; 3386 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3387 } 3388 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3389 coarsening_ratio = size/n_subdomains; 3390 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3391 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3392 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3393 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3394 3395 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3396 if (contiguous) { 3397 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3398 } else { 3399 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3400 } 3401 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3402 /* clean up */ 3403 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3404 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3405 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3406 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3407 } 3408 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3409 3410 /* assemble parallel IS for sends */ 3411 i = 1; 3412 if (color) i=0; 3413 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3414 3415 /* get back IS */ 3416 *is_sends = ranks_send_to; 3417 PetscFunctionReturn(0); 3418 } 3419 3420 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3421 3422 #undef __FUNCT__ 3423 #define __FUNCT__ "MatISSubassemble" 3424 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3425 { 3426 Mat local_mat; 3427 Mat_IS *matis; 3428 IS is_sends_internal; 3429 PetscInt rows,cols,new_local_rows; 3430 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3431 PetscBool ismatis,isdense,newisdense,destroy_mat; 3432 ISLocalToGlobalMapping l2gmap; 3433 PetscInt* l2gmap_indices; 3434 const PetscInt* is_indices; 3435 MatType new_local_type; 3436 /* buffers */ 3437 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3438 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3439 PetscInt *recv_buffer_idxs_local; 3440 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3441 /* MPI */ 3442 MPI_Comm comm,comm_n; 3443 PetscSubcomm subcomm; 3444 PetscMPIInt n_sends,n_recvs,commsize; 3445 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3446 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3447 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3448 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3449 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3450 PetscErrorCode ierr; 3451 3452 PetscFunctionBegin; 3453 /* TODO: add missing checks */ 3454 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3455 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3456 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3457 PetscValidLogicalCollectiveInt(mat,nis,7); 3458 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3459 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3460 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3461 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3462 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3463 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3464 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3465 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3466 PetscInt mrows,mcols,mnrows,mncols; 3467 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3468 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3469 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3470 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3471 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3472 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3473 } 3474 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3475 PetscValidLogicalCollectiveInt(mat,bs,0); 3476 /* prepare IS for sending if not provided */ 3477 if (!is_sends) { 3478 PetscBool pcontig = PETSC_TRUE; 3479 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3480 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3481 } else { 3482 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3483 is_sends_internal = is_sends; 3484 } 3485 3486 /* get pointer of MATIS data */ 3487 matis = (Mat_IS*)mat->data; 3488 3489 /* get comm */ 3490 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3491 3492 /* compute number of sends */ 3493 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3494 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3495 3496 /* compute number of receives */ 3497 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3498 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3499 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3500 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3501 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3502 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3503 ierr = PetscFree(iflags);CHKERRQ(ierr); 3504 3505 /* restrict comm if requested */ 3506 subcomm = 0; 3507 destroy_mat = PETSC_FALSE; 3508 if (restrict_comm) { 3509 PetscMPIInt color,subcommsize; 3510 3511 color = 0; 3512 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3513 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3514 subcommsize = commsize - subcommsize; 3515 /* check if reuse has been requested */ 3516 if (reuse == MAT_REUSE_MATRIX) { 3517 if (*mat_n) { 3518 PetscMPIInt subcommsize2; 3519 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3520 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3521 comm_n = PetscObjectComm((PetscObject)*mat_n); 3522 } else { 3523 comm_n = PETSC_COMM_SELF; 3524 } 3525 } else { /* MAT_INITIAL_MATRIX */ 3526 PetscMPIInt rank; 3527 3528 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3529 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3530 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3531 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3532 comm_n = PetscSubcommChild(subcomm); 3533 } 3534 /* flag to destroy *mat_n if not significative */ 3535 if (color) destroy_mat = PETSC_TRUE; 3536 } else { 3537 comm_n = comm; 3538 } 3539 3540 /* prepare send/receive buffers */ 3541 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3542 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3543 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3544 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3545 if (nis) { 3546 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3547 } 3548 3549 /* Get data from local matrices */ 3550 if (!isdense) { 3551 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3552 /* TODO: See below some guidelines on how to prepare the local buffers */ 3553 /* 3554 send_buffer_vals should contain the raw values of the local matrix 3555 send_buffer_idxs should contain: 3556 - MatType_PRIVATE type 3557 - PetscInt size_of_l2gmap 3558 - PetscInt global_row_indices[size_of_l2gmap] 3559 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3560 */ 3561 } else { 3562 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3563 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3564 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3565 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3566 send_buffer_idxs[1] = i; 3567 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3568 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3569 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3570 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3571 for (i=0;i<n_sends;i++) { 3572 ilengths_vals[is_indices[i]] = len*len; 3573 ilengths_idxs[is_indices[i]] = len+2; 3574 } 3575 } 3576 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3577 /* additional is (if any) */ 3578 if (nis) { 3579 PetscMPIInt psum; 3580 PetscInt j; 3581 for (j=0,psum=0;j<nis;j++) { 3582 PetscInt plen; 3583 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3584 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3585 psum += len+1; /* indices + lenght */ 3586 } 3587 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3588 for (j=0,psum=0;j<nis;j++) { 3589 PetscInt plen; 3590 const PetscInt *is_array_idxs; 3591 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3592 send_buffer_idxs_is[psum] = plen; 3593 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3594 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3595 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3596 psum += plen+1; /* indices + lenght */ 3597 } 3598 for (i=0;i<n_sends;i++) { 3599 ilengths_idxs_is[is_indices[i]] = psum; 3600 } 3601 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3602 } 3603 3604 buf_size_idxs = 0; 3605 buf_size_vals = 0; 3606 buf_size_idxs_is = 0; 3607 for (i=0;i<n_recvs;i++) { 3608 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3609 buf_size_vals += (PetscInt)olengths_vals[i]; 3610 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3611 } 3612 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3613 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3614 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3615 3616 /* get new tags for clean communications */ 3617 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3618 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3619 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3620 3621 /* allocate for requests */ 3622 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3623 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3624 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3625 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3626 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3627 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3628 3629 /* communications */ 3630 ptr_idxs = recv_buffer_idxs; 3631 ptr_vals = recv_buffer_vals; 3632 ptr_idxs_is = recv_buffer_idxs_is; 3633 for (i=0;i<n_recvs;i++) { 3634 source_dest = onodes[i]; 3635 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3636 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3637 ptr_idxs += olengths_idxs[i]; 3638 ptr_vals += olengths_vals[i]; 3639 if (nis) { 3640 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3641 ptr_idxs_is += olengths_idxs_is[i]; 3642 } 3643 } 3644 for (i=0;i<n_sends;i++) { 3645 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3646 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3647 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3648 if (nis) { 3649 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3650 } 3651 } 3652 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3653 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3654 3655 /* assemble new l2g map */ 3656 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3657 ptr_idxs = recv_buffer_idxs; 3658 new_local_rows = 0; 3659 for (i=0;i<n_recvs;i++) { 3660 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3661 ptr_idxs += olengths_idxs[i]; 3662 } 3663 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3664 ptr_idxs = recv_buffer_idxs; 3665 new_local_rows = 0; 3666 for (i=0;i<n_recvs;i++) { 3667 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3668 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3669 ptr_idxs += olengths_idxs[i]; 3670 } 3671 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3672 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3673 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3674 3675 /* infer new local matrix type from received local matrices type */ 3676 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3677 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3678 if (n_recvs) { 3679 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3680 ptr_idxs = recv_buffer_idxs; 3681 for (i=0;i<n_recvs;i++) { 3682 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3683 new_local_type_private = MATAIJ_PRIVATE; 3684 break; 3685 } 3686 ptr_idxs += olengths_idxs[i]; 3687 } 3688 switch (new_local_type_private) { 3689 case MATDENSE_PRIVATE: 3690 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3691 new_local_type = MATSEQAIJ; 3692 bs = 1; 3693 } else { /* if I receive only 1 dense matrix */ 3694 new_local_type = MATSEQDENSE; 3695 bs = 1; 3696 } 3697 break; 3698 case MATAIJ_PRIVATE: 3699 new_local_type = MATSEQAIJ; 3700 bs = 1; 3701 break; 3702 case MATBAIJ_PRIVATE: 3703 new_local_type = MATSEQBAIJ; 3704 break; 3705 case MATSBAIJ_PRIVATE: 3706 new_local_type = MATSEQSBAIJ; 3707 break; 3708 default: 3709 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3710 break; 3711 } 3712 } else { /* by default, new_local_type is seqdense */ 3713 new_local_type = MATSEQDENSE; 3714 bs = 1; 3715 } 3716 3717 /* create MATIS object if needed */ 3718 if (reuse == MAT_INITIAL_MATRIX) { 3719 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3720 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3721 } else { 3722 /* it also destroys the local matrices */ 3723 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3724 } 3725 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3726 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3727 3728 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3729 3730 /* Global to local map of received indices */ 3731 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3732 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3733 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3734 3735 /* restore attributes -> type of incoming data and its size */ 3736 buf_size_idxs = 0; 3737 for (i=0;i<n_recvs;i++) { 3738 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3739 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3740 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3741 } 3742 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3743 3744 /* set preallocation */ 3745 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3746 if (!newisdense) { 3747 PetscInt *new_local_nnz=0; 3748 3749 ptr_vals = recv_buffer_vals; 3750 ptr_idxs = recv_buffer_idxs_local; 3751 if (n_recvs) { 3752 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3753 } 3754 for (i=0;i<n_recvs;i++) { 3755 PetscInt j; 3756 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3757 for (j=0;j<*(ptr_idxs+1);j++) { 3758 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3759 } 3760 } else { 3761 /* TODO */ 3762 } 3763 ptr_idxs += olengths_idxs[i]; 3764 } 3765 if (new_local_nnz) { 3766 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3767 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3768 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3769 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3770 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3771 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3772 } else { 3773 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3774 } 3775 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3776 } else { 3777 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3778 } 3779 3780 /* set values */ 3781 ptr_vals = recv_buffer_vals; 3782 ptr_idxs = recv_buffer_idxs_local; 3783 for (i=0;i<n_recvs;i++) { 3784 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3785 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3786 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3787 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3788 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3789 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3790 } else { 3791 /* TODO */ 3792 } 3793 ptr_idxs += olengths_idxs[i]; 3794 ptr_vals += olengths_vals[i]; 3795 } 3796 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3797 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3798 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3799 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3800 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3801 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3802 3803 #if 0 3804 if (!restrict_comm) { /* check */ 3805 Vec lvec,rvec; 3806 PetscReal infty_error; 3807 3808 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3809 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3810 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3811 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3812 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3813 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3814 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3815 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3816 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3817 } 3818 #endif 3819 3820 /* assemble new additional is (if any) */ 3821 if (nis) { 3822 PetscInt **temp_idxs,*count_is,j,psum; 3823 3824 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3825 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3826 ptr_idxs = recv_buffer_idxs_is; 3827 psum = 0; 3828 for (i=0;i<n_recvs;i++) { 3829 for (j=0;j<nis;j++) { 3830 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3831 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3832 psum += plen; 3833 ptr_idxs += plen+1; /* shift pointer to received data */ 3834 } 3835 } 3836 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3837 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3838 for (i=1;i<nis;i++) { 3839 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3840 } 3841 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3842 ptr_idxs = recv_buffer_idxs_is; 3843 for (i=0;i<n_recvs;i++) { 3844 for (j=0;j<nis;j++) { 3845 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3846 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3847 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3848 ptr_idxs += plen+1; /* shift pointer to received data */ 3849 } 3850 } 3851 for (i=0;i<nis;i++) { 3852 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3853 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3854 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3855 } 3856 ierr = PetscFree(count_is);CHKERRQ(ierr); 3857 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3858 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3859 } 3860 /* free workspace */ 3861 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3862 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3863 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3864 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3865 if (isdense) { 3866 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3867 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3868 } else { 3869 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3870 } 3871 if (nis) { 3872 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3873 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3874 } 3875 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3876 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3877 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3878 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3879 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3880 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3881 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3882 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3883 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3884 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3885 ierr = PetscFree(onodes);CHKERRQ(ierr); 3886 if (nis) { 3887 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3888 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3889 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3890 } 3891 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3892 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3893 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3894 for (i=0;i<nis;i++) { 3895 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3896 } 3897 } 3898 PetscFunctionReturn(0); 3899 } 3900 3901 /* temporary hack into ksp private data structure */ 3902 #include <petsc/private/kspimpl.h> 3903 3904 #undef __FUNCT__ 3905 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3906 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3907 { 3908 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3909 PC_IS *pcis = (PC_IS*)pc->data; 3910 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3911 MatNullSpace CoarseNullSpace=NULL; 3912 ISLocalToGlobalMapping coarse_islg; 3913 IS coarse_is,*isarray; 3914 PetscInt i,im_active=-1,active_procs=-1; 3915 PetscInt nis,nisdofs,nisneu; 3916 PC pc_temp; 3917 PCType coarse_pc_type; 3918 KSPType coarse_ksp_type; 3919 PetscBool multilevel_requested,multilevel_allowed; 3920 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3921 Mat t_coarse_mat_is; 3922 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3923 PetscMPIInt all_procs; 3924 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3925 PetscBool compute_vecs = PETSC_FALSE; 3926 PetscScalar *array; 3927 PetscErrorCode ierr; 3928 3929 PetscFunctionBegin; 3930 /* Assign global numbering to coarse dofs */ 3931 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3932 PetscInt ocoarse_size; 3933 compute_vecs = PETSC_TRUE; 3934 ocoarse_size = pcbddc->coarse_size; 3935 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3936 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3937 /* see if we can avoid some work */ 3938 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3939 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3940 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3941 coarse_reuse = PETSC_FALSE; 3942 } else { /* we can safely reuse already computed coarse matrix */ 3943 coarse_reuse = PETSC_TRUE; 3944 } 3945 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3946 coarse_reuse = PETSC_FALSE; 3947 } 3948 /* reset any subassembling information */ 3949 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3950 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3951 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3952 coarse_reuse = PETSC_TRUE; 3953 } 3954 3955 /* count "active" (i.e. with positive local size) and "void" processes */ 3956 im_active = !!(pcis->n); 3957 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3958 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3959 void_procs = all_procs-active_procs; 3960 csin_type_simple = PETSC_TRUE; 3961 redist = PETSC_FALSE; 3962 if (pcbddc->current_level && void_procs) { 3963 csin_ml = PETSC_TRUE; 3964 ncoarse_ml = void_procs; 3965 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3966 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3967 csin_ds = PETSC_TRUE; 3968 ncoarse_ds = pcbddc->redistribute_coarse; 3969 redist = PETSC_TRUE; 3970 } else { 3971 csin_ds = PETSC_TRUE; 3972 ncoarse_ds = active_procs; 3973 redist = PETSC_TRUE; 3974 } 3975 } else { 3976 csin_ml = PETSC_FALSE; 3977 ncoarse_ml = all_procs; 3978 if (void_procs) { 3979 csin_ds = PETSC_TRUE; 3980 ncoarse_ds = void_procs; 3981 csin_type_simple = PETSC_FALSE; 3982 } else { 3983 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3984 csin_ds = PETSC_TRUE; 3985 ncoarse_ds = pcbddc->redistribute_coarse; 3986 redist = PETSC_TRUE; 3987 } else { 3988 csin_ds = PETSC_FALSE; 3989 ncoarse_ds = all_procs; 3990 } 3991 } 3992 } 3993 3994 /* 3995 test if we can go multilevel: three conditions must be satisfied: 3996 - we have not exceeded the number of levels requested 3997 - we can actually subassemble the active processes 3998 - we can find a suitable number of MPI processes where we can place the subassembled problem 3999 */ 4000 multilevel_allowed = PETSC_FALSE; 4001 multilevel_requested = PETSC_FALSE; 4002 if (pcbddc->current_level < pcbddc->max_levels) { 4003 multilevel_requested = PETSC_TRUE; 4004 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4005 multilevel_allowed = PETSC_FALSE; 4006 } else { 4007 multilevel_allowed = PETSC_TRUE; 4008 } 4009 } 4010 /* determine number of process partecipating to coarse solver */ 4011 if (multilevel_allowed) { 4012 ncoarse = ncoarse_ml; 4013 csin = csin_ml; 4014 redist = PETSC_FALSE; 4015 } else { 4016 ncoarse = ncoarse_ds; 4017 csin = csin_ds; 4018 } 4019 4020 /* creates temporary l2gmap and IS for coarse indexes */ 4021 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4022 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4023 4024 /* creates temporary MATIS object for coarse matrix */ 4025 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4026 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4027 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4028 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4029 #if 0 4030 { 4031 PetscViewer viewer; 4032 char filename[256]; 4033 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4034 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4035 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4036 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4037 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4038 } 4039 #endif 4040 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4041 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4042 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4043 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4044 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4045 4046 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4047 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4048 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4049 const PetscInt *idxs; 4050 ISLocalToGlobalMapping tmap; 4051 4052 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4053 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4054 /* allocate space for temporary storage */ 4055 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4056 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4057 /* allocate for IS array */ 4058 nisdofs = pcbddc->n_ISForDofsLocal; 4059 nisneu = !!pcbddc->NeumannBoundariesLocal; 4060 nis = nisdofs + nisneu; 4061 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4062 /* dofs splitting */ 4063 for (i=0;i<nisdofs;i++) { 4064 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4065 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4066 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4067 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4068 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4069 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4070 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4071 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4072 } 4073 /* neumann boundaries */ 4074 if (pcbddc->NeumannBoundariesLocal) { 4075 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4076 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4077 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4078 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4079 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4080 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4081 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4082 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4083 } 4084 /* free memory */ 4085 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4086 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4087 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4088 } else { 4089 nis = 0; 4090 nisdofs = 0; 4091 nisneu = 0; 4092 isarray = NULL; 4093 } 4094 /* destroy no longer needed map */ 4095 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4096 4097 /* restrict on coarse candidates (if needed) */ 4098 coarse_mat_is = NULL; 4099 if (csin) { 4100 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4101 if (redist) { 4102 PetscMPIInt rank; 4103 PetscInt spc,n_spc_p1,dest[1],destsize; 4104 4105 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4106 spc = active_procs/ncoarse; 4107 n_spc_p1 = active_procs%ncoarse; 4108 if (im_active) { 4109 destsize = 1; 4110 if (rank > n_spc_p1*(spc+1)-1) { 4111 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4112 } else { 4113 dest[0] = rank/(spc+1); 4114 } 4115 } else { 4116 destsize = 0; 4117 } 4118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4119 } else if (csin_type_simple) { 4120 PetscMPIInt rank; 4121 PetscInt issize,isidx; 4122 4123 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4124 if (im_active) { 4125 issize = 1; 4126 isidx = (PetscInt)rank; 4127 } else { 4128 issize = 0; 4129 isidx = -1; 4130 } 4131 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4132 } else { /* get a suitable subassembling pattern from MATIS code */ 4133 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4134 } 4135 4136 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4137 if (!redist || ncoarse <= void_procs) { 4138 PetscInt ncoarse_cand,tissize,*nisindices; 4139 PetscInt *coarse_candidates; 4140 const PetscInt* tisindices; 4141 4142 /* get coarse candidates' ranks in pc communicator */ 4143 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4144 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4145 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4146 if (!coarse_candidates[i]) { 4147 coarse_candidates[ncoarse_cand++]=i; 4148 } 4149 } 4150 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4151 4152 4153 if (pcbddc->dbg_flag) { 4154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4156 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4158 for (i=0;i<ncoarse_cand;i++) { 4159 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4160 } 4161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4162 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4163 } 4164 /* shift the pattern on coarse candidates */ 4165 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4166 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4167 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4168 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4169 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4170 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4171 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4172 } 4173 if (pcbddc->dbg_flag) { 4174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4175 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4176 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4177 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4178 } 4179 } 4180 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4181 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4182 } else { 4183 if (pcbddc->dbg_flag) { 4184 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4187 } 4188 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4189 coarse_mat_is = t_coarse_mat_is; 4190 } 4191 4192 /* create local to global scatters for coarse problem */ 4193 if (compute_vecs) { 4194 PetscInt lrows; 4195 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4196 if (coarse_mat_is) { 4197 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4198 } else { 4199 lrows = 0; 4200 } 4201 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4202 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4203 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4204 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4205 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4206 } 4207 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4208 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4209 4210 /* set defaults for coarse KSP and PC */ 4211 if (multilevel_allowed) { 4212 coarse_ksp_type = KSPRICHARDSON; 4213 coarse_pc_type = PCBDDC; 4214 } else { 4215 coarse_ksp_type = KSPPREONLY; 4216 coarse_pc_type = PCREDUNDANT; 4217 } 4218 4219 /* print some info if requested */ 4220 if (pcbddc->dbg_flag) { 4221 if (!multilevel_allowed) { 4222 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4223 if (multilevel_requested) { 4224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4225 } else if (pcbddc->max_levels) { 4226 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4227 } 4228 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4229 } 4230 } 4231 4232 /* create the coarse KSP object only once with defaults */ 4233 if (coarse_mat_is) { 4234 MatReuse coarse_mat_reuse; 4235 PetscViewer dbg_viewer = NULL; 4236 if (pcbddc->dbg_flag) { 4237 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4238 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4239 } 4240 if (!pcbddc->coarse_ksp) { 4241 char prefix[256],str_level[16]; 4242 size_t len; 4243 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4244 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4245 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4246 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4247 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4248 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4249 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4250 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4251 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4252 /* prefix */ 4253 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4254 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4255 if (!pcbddc->current_level) { 4256 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4257 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4258 } else { 4259 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4260 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4261 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4262 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4263 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4264 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4265 } 4266 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4267 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4268 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4269 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4270 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4271 /* allow user customization */ 4272 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4273 } 4274 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4275 if (nisdofs) { 4276 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4277 for (i=0;i<nisdofs;i++) { 4278 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4279 } 4280 } 4281 if (nisneu) { 4282 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4283 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4284 } 4285 4286 /* get some info after set from options */ 4287 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4288 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4289 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4290 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4291 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4292 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4293 isbddc = PETSC_FALSE; 4294 } 4295 if (isredundant) { 4296 KSP inner_ksp; 4297 PC inner_pc; 4298 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4299 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4300 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4301 } 4302 4303 /* assemble coarse matrix */ 4304 if (coarse_reuse) { 4305 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4306 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4307 coarse_mat_reuse = MAT_REUSE_MATRIX; 4308 } else { 4309 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4310 } 4311 if (isbddc || isnn) { 4312 if (pcbddc->coarsening_ratio > 1) { 4313 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4314 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4315 if (pcbddc->dbg_flag) { 4316 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4317 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4318 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4319 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4320 } 4321 } 4322 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4323 } else { 4324 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4325 coarse_mat = coarse_mat_is; 4326 } 4327 } else { 4328 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4329 } 4330 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4331 4332 /* propagate symmetry info to coarse matrix */ 4333 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4334 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4335 4336 /* set operators */ 4337 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4338 if (pcbddc->dbg_flag) { 4339 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4340 } 4341 } else { /* processes non partecipating to coarse solver (if any) */ 4342 coarse_mat = 0; 4343 } 4344 ierr = PetscFree(isarray);CHKERRQ(ierr); 4345 #if 0 4346 { 4347 PetscViewer viewer; 4348 char filename[256]; 4349 sprintf(filename,"coarse_mat.m"); 4350 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4351 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4352 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4353 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4354 } 4355 #endif 4356 4357 /* Compute coarse null space (special handling by BDDC only) */ 4358 if (pcbddc->NullSpace) { 4359 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4360 } 4361 4362 if (pcbddc->coarse_ksp) { 4363 Vec crhs,csol; 4364 PetscBool ispreonly; 4365 if (CoarseNullSpace) { 4366 if (isbddc) { 4367 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4368 } else { 4369 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4370 } 4371 } 4372 /* setup coarse ksp */ 4373 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4374 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4375 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4376 /* hack */ 4377 if (!csol) { 4378 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4379 } 4380 if (!crhs) { 4381 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4382 } 4383 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4384 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4385 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4386 KSP check_ksp; 4387 KSPType check_ksp_type; 4388 PC check_pc; 4389 Vec check_vec,coarse_vec; 4390 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4391 PetscInt its; 4392 PetscBool compute_eigs; 4393 PetscReal *eigs_r,*eigs_c; 4394 PetscInt neigs; 4395 const char *prefix; 4396 4397 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4398 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4399 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4400 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4401 if (ispreonly) { 4402 check_ksp_type = KSPPREONLY; 4403 compute_eigs = PETSC_FALSE; 4404 } else { 4405 check_ksp_type = KSPGMRES; 4406 compute_eigs = PETSC_TRUE; 4407 } 4408 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4409 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4410 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4411 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4412 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4413 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4414 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4415 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4416 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4417 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4418 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4419 /* create random vec */ 4420 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4421 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4422 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4423 if (CoarseNullSpace) { 4424 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4425 } 4426 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4427 /* solve coarse problem */ 4428 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4429 if (CoarseNullSpace) { 4430 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4431 } 4432 /* set eigenvalue estimation if preonly has not been requested */ 4433 if (compute_eigs) { 4434 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4435 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4436 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4437 lambda_max = eigs_r[neigs-1]; 4438 lambda_min = eigs_r[0]; 4439 if (pcbddc->use_coarse_estimates) { 4440 if (lambda_max>lambda_min) { 4441 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4442 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4443 } 4444 } 4445 } 4446 4447 /* check coarse problem residual error */ 4448 if (pcbddc->dbg_flag) { 4449 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4450 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4451 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4452 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4453 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4454 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4455 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4456 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4457 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4458 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4459 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4460 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4461 if (compute_eigs) { 4462 PetscReal lambda_max_s,lambda_min_s; 4463 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4464 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4465 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4466 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4467 for (i=0;i<neigs;i++) { 4468 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4469 } 4470 } 4471 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4472 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4473 } 4474 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4475 if (compute_eigs) { 4476 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4477 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4478 } 4479 } 4480 } 4481 /* print additional info */ 4482 if (pcbddc->dbg_flag) { 4483 /* waits until all processes reaches this point */ 4484 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4485 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4487 } 4488 4489 /* free memory */ 4490 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4491 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4492 PetscFunctionReturn(0); 4493 } 4494 4495 #undef __FUNCT__ 4496 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4497 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4498 { 4499 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4500 PC_IS* pcis = (PC_IS*)pc->data; 4501 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4502 PetscInt i,coarse_size=0; 4503 PetscInt *local_primal_indices=NULL; 4504 PetscErrorCode ierr; 4505 4506 PetscFunctionBegin; 4507 /* Compute global number of coarse dofs */ 4508 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4509 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4510 } 4511 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4512 4513 /* check numbering */ 4514 if (pcbddc->dbg_flag) { 4515 PetscScalar coarsesum,*array; 4516 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4517 4518 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4519 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4520 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4521 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4522 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4523 for (i=0;i<pcbddc->local_primal_size;i++) { 4524 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4525 } 4526 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4527 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4528 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4529 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4530 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4531 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4532 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4533 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4534 for (i=0;i<pcis->n;i++) { 4535 if (array[i] == 1.0) { 4536 set_error = PETSC_TRUE; 4537 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4538 } 4539 } 4540 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4542 for (i=0;i<pcis->n;i++) { 4543 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4544 } 4545 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4546 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4547 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4548 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4549 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4550 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4551 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4552 PetscInt *gidxs; 4553 4554 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4555 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4556 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4557 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4558 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4559 for (i=0;i<pcbddc->local_primal_size;i++) { 4560 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4561 } 4562 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4563 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4564 } 4565 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4566 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4567 } 4568 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4569 /* get back data */ 4570 *coarse_size_n = coarse_size; 4571 *local_primal_indices_n = local_primal_indices; 4572 PetscFunctionReturn(0); 4573 } 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "PCBDDCGlobalToLocal" 4577 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4578 { 4579 IS localis_t; 4580 PetscInt i,lsize,*idxs,n; 4581 PetscScalar *vals; 4582 PetscErrorCode ierr; 4583 4584 PetscFunctionBegin; 4585 /* get indices in local ordering exploiting local to global map */ 4586 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4587 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4588 for (i=0;i<lsize;i++) vals[i] = 1.0; 4589 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4590 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4591 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4592 if (idxs) { /* multilevel guard */ 4593 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4594 } 4595 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4596 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4597 ierr = PetscFree(vals);CHKERRQ(ierr); 4598 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4599 /* now compute set in local ordering */ 4600 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4601 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4602 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4603 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4604 for (i=0,lsize=0;i<n;i++) { 4605 if (PetscRealPart(vals[i]) > 0.5) { 4606 lsize++; 4607 } 4608 } 4609 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4610 for (i=0,lsize=0;i<n;i++) { 4611 if (PetscRealPart(vals[i]) > 0.5) { 4612 idxs[lsize++] = i; 4613 } 4614 } 4615 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4616 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4617 *localis = localis_t; 4618 PetscFunctionReturn(0); 4619 } 4620 4621 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4622 #undef __FUNCT__ 4623 #define __FUNCT__ "PCBDDCMatMult_Private" 4624 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4625 { 4626 PCBDDCChange_ctx change_ctx; 4627 PetscErrorCode ierr; 4628 4629 PetscFunctionBegin; 4630 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4631 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4632 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4633 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4634 PetscFunctionReturn(0); 4635 } 4636 4637 #undef __FUNCT__ 4638 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4639 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4640 { 4641 PCBDDCChange_ctx change_ctx; 4642 PetscErrorCode ierr; 4643 4644 PetscFunctionBegin; 4645 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4646 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4647 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4648 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4649 PetscFunctionReturn(0); 4650 } 4651 4652 #undef __FUNCT__ 4653 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4654 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4655 { 4656 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4657 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4658 PetscInt *used_xadj,*used_adjncy; 4659 PetscBool free_used_adj; 4660 PetscErrorCode ierr; 4661 4662 PetscFunctionBegin; 4663 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4664 free_used_adj = PETSC_FALSE; 4665 if (pcbddc->sub_schurs_layers == -1) { 4666 used_xadj = NULL; 4667 used_adjncy = NULL; 4668 } else { 4669 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4670 used_xadj = pcbddc->mat_graph->xadj; 4671 used_adjncy = pcbddc->mat_graph->adjncy; 4672 } else if (pcbddc->computed_rowadj) { 4673 used_xadj = pcbddc->mat_graph->xadj; 4674 used_adjncy = pcbddc->mat_graph->adjncy; 4675 } else { 4676 PetscBool flg_row=PETSC_FALSE; 4677 const PetscInt *xadj,*adjncy; 4678 PetscInt nvtxs; 4679 4680 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4681 if (flg_row) { 4682 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4683 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4684 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4685 free_used_adj = PETSC_TRUE; 4686 } else { 4687 pcbddc->sub_schurs_layers = -1; 4688 used_xadj = NULL; 4689 used_adjncy = NULL; 4690 } 4691 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4692 } 4693 } 4694 4695 /* set Schur complement solver for interior variables (used only when MUMPS is not present) */ 4696 if (!sub_schurs->use_mumps) { 4697 ierr = MatSchurComplementSetKSP(sub_schurs->S,pcbddc->ksp_D);CHKERRQ(ierr); 4698 } 4699 4700 /* setup sub_schurs data */ 4701 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4702 4703 /* free adjacency */ 4704 if (free_used_adj) { 4705 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4706 } 4707 PetscFunctionReturn(0); 4708 } 4709 4710 #undef __FUNCT__ 4711 #define __FUNCT__ "PCBDDCInitSubSchurs" 4712 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4713 { 4714 PC_IS *pcis=(PC_IS*)pc->data; 4715 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4716 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4717 PCBDDCGraph graph; 4718 Mat S_j; 4719 PetscErrorCode ierr; 4720 4721 PetscFunctionBegin; 4722 /* attach interface graph for determining subsets */ 4723 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4724 IS verticesIS; 4725 4726 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4727 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4728 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4729 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4730 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4731 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4732 /* 4733 if (pcbddc->dbg_flag) { 4734 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4735 } 4736 */ 4737 } else { 4738 graph = pcbddc->mat_graph; 4739 } 4740 4741 /* Create Schur complement matrix */ 4742 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4743 4744 /* sub_schurs init */ 4745 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4746 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4747 /* free graph struct */ 4748 if (pcbddc->sub_schurs_rebuild) { 4749 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4750 } 4751 PetscFunctionReturn(0); 4752 } 4753