1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 60 nmax = PetscMax(nmin,nmax); 61 62 /* allocate lapack workspace */ 63 cum = cum2 = 0; 64 maxneigs = 0; 65 for (i=0;i<sub_schurs->n_subs;i++) { 66 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 67 PetscInt n,subset_size; 68 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 69 n = PetscMin(subset_size,nmax); 70 cum += subset_size*n; 71 cum2 += n; 72 maxneigs = PetscMax(maxneigs,n); 73 } 74 } 75 76 if (mss) { 77 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 78 PetscBLASInt B_itype = 1; 79 PetscBLASInt B_N = mss; 80 PetscReal zero = 0.0; 81 PetscReal eps = 0.0; /* dlamch? */ 82 83 B_lwork = -1; 84 S = NULL; 85 St = NULL; 86 eigs = NULL; 87 eigv = NULL; 88 B_iwork = NULL; 89 B_ifail = NULL; 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */ 108 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 112 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 113 #if defined(PETSC_USE_COMPLEX) 114 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 115 #endif 116 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 117 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 118 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 119 nv+cum,&pcbddc->adaptive_constraints_idxs, 120 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 121 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 122 123 maxneigs = 0; 124 cum = cum2 = cumarray = 0; 125 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 126 const PetscInt *idxs; 127 128 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 129 for (cum=0;cum<nv;cum++) { 130 pcbddc->adaptive_constraints_n[cum] = 1; 131 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 132 pcbddc->adaptive_constraints_ptrs[cum] = cum; 133 pcbddc->adaptive_constraints_data[cum] = 1.0; 134 } 135 cum2 = cum; 136 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 137 } 138 139 if (mss) { /* multilevel */ 140 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 141 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 142 } 143 144 for (i=0;i<sub_schurs->n_subs;i++) { 145 PetscInt j,subset_size; 146 147 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 148 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 149 const PetscInt *idxs; 150 PetscScalar one = 1.0,scalar_zero = 0.0; 151 PetscReal zero=0.0; 152 PetscBLASInt B_N; 153 154 /* S should be copied since we need it for deluxe scaling */ 155 if (sub_schurs->is_hermitian) { 156 PetscInt j; 157 for (j=0;j<subset_size;j++) { 158 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 159 } 160 for (j=0;j<subset_size;j++) { 161 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 162 } 163 } else { 164 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 /* is always this the right matrix? */ 168 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 170 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 171 /* St = Starray+cumarray; */ 172 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 173 if (pcbddc->adaptive_threshold > 1.0) { /* this is an heuristic for edges */ 174 thresh = (1.*pcbddc->mat_graph->count[idxs[0]])/(pcbddc->adaptive_threshold); 175 } else { 176 thresh = 1.0; 177 } 178 179 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 180 PetscBLASInt B_itype = 1; 181 PetscBLASInt B_IL = 1, B_IU; 182 PetscReal eps = -1.0; /* dlamch? */ 183 PetscInt nmin_s; 184 185 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 186 /* ask for eigenvalues lower than thresh */ 187 if (pcbddc->dbg_flag) { 188 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 189 } 190 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 191 #if defined(PETSC_USE_COMPLEX) 192 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 193 #else 194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 195 #endif 196 ierr = PetscFPTrapPop();CHKERRQ(ierr); 197 if (B_ierr) { 198 if (B_ierr < 0 ) { 199 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 200 } else if (B_ierr <= B_N) { 201 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 202 } else { 203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 204 } 205 } 206 207 if (B_neigs > nmax) { 208 if (pcbddc->dbg_flag) { 209 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 210 } 211 B_neigs = nmax; 212 } 213 214 nmin_s = PetscMin(nmin,B_N); 215 if (B_neigs < nmin_s) { 216 PetscBLASInt B_neigs2; 217 218 B_IL = B_neigs + 1; 219 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 220 if (pcbddc->dbg_flag) { 221 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 222 } 223 if (sub_schurs->is_hermitian) { 224 PetscInt j; 225 for (j=0;j<subset_size;j++) { 226 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 227 } 228 for (j=0;j<subset_size;j++) { 229 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 230 } 231 } else { 232 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 233 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 236 #if defined(PETSC_USE_COMPLEX) 237 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 238 #else 239 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 240 #endif 241 ierr = PetscFPTrapPop();CHKERRQ(ierr); 242 B_neigs += B_neigs2; 243 } 244 if (B_ierr) { 245 if (B_ierr < 0 ) { 246 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 247 } else if (B_ierr <= B_N) { 248 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 249 } else { 250 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 251 } 252 } 253 if (pcbddc->dbg_flag) { 254 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 255 for (j=0;j<B_neigs;j++) { 256 if (eigs[j] == 0.0) { 257 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 258 } else { 259 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1.0/eigs[j]); 260 } 261 } 262 } 263 } else { 264 /* TODO */ 265 } 266 267 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 268 if (B_neigs) { 269 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 270 } 271 maxneigs = PetscMax(B_neigs,maxneigs); 272 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 273 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 274 275 if (pcbddc->dbg_flag > 1) { 276 PetscInt ii; 277 for (ii=0;ii<B_neigs;ii++) { 278 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 279 for (j=0;j<B_N;j++) { 280 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]); 281 } 282 } 283 } 284 for (j=0;j<B_neigs;j++) { 285 #if 0 286 { 287 PetscBLASInt Blas_N,Blas_one = 1.0; 288 PetscScalar norm; 289 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 290 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 291 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 292 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 293 } else { 294 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 295 } 296 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 297 } 298 #endif 299 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 300 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 301 cum2 += subset_size; 302 } 303 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 304 } 305 /* shift for next computation */ 306 cumarray += subset_size*subset_size; 307 } 308 if (pcbddc->dbg_flag) { 309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 310 } 311 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 312 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 313 314 if (mss) { 315 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 316 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 317 } 318 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 319 #if defined(PETSC_USE_COMPLEX) 320 ierr = PetscFree(rwork);CHKERRQ(ierr); 321 #endif 322 if (pcbddc->dbg_flag) { 323 PetscInt maxneigs_r; 324 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 325 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 326 } 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "PCBDDCSetUpSolvers" 332 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 333 { 334 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 335 PetscScalar *coarse_submat_vals; 336 PetscErrorCode ierr; 337 338 PetscFunctionBegin; 339 /* Setup local scatters R_to_B and (optionally) R_to_D */ 340 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 341 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 342 343 /* Setup local neumann solver ksp_R */ 344 /* PCBDDCSetUpLocalScatters should be called first! */ 345 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 346 347 /* Change global null space passed in by the user if change of basis has been requested */ 348 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 349 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 350 } 351 352 /* 353 Setup local correction and local part of coarse basis. 354 Gives back the dense local part of the coarse matrix in column major ordering 355 */ 356 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 357 358 /* Compute total number of coarse nodes and setup coarse solver */ 359 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 360 361 /* free */ 362 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "PCBDDCResetCustomization" 368 PetscErrorCode PCBDDCResetCustomization(PC pc) 369 { 370 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 371 PetscErrorCode ierr; 372 373 PetscFunctionBegin; 374 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 375 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 376 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 377 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 378 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 379 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 380 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 381 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 383 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 384 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCResetTopography" 390 PetscErrorCode PCBDDCResetTopography(PC pc) 391 { 392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 397 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 398 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 399 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "PCBDDCResetSolvers" 406 PetscErrorCode PCBDDCResetSolvers(PC pc) 407 { 408 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 409 PetscScalar *array; 410 PetscErrorCode ierr; 411 412 PetscFunctionBegin; 413 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 414 if (pcbddc->coarse_phi_B) { 415 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 416 ierr = PetscFree(array);CHKERRQ(ierr); 417 } 418 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 421 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 422 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 423 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 424 if (pcbddc->local_auxmat2) { 425 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 426 ierr = PetscFree(array);CHKERRQ(ierr); 427 } 428 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 430 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 432 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 433 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 434 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 435 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 436 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 437 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 438 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 439 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 440 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 441 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 442 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 443 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 449 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 450 { 451 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 452 PC_IS *pcis = (PC_IS*)pc->data; 453 VecType impVecType; 454 PetscInt n_constraints,n_R,old_size; 455 PetscErrorCode ierr; 456 457 PetscFunctionBegin; 458 if (!pcbddc->ConstraintMatrix) { 459 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 460 } 461 /* get sizes */ 462 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 463 n_R = pcis->n-pcbddc->n_actual_vertices; 464 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 465 /* local work vectors (try to avoid unneeded work)*/ 466 /* R nodes */ 467 old_size = -1; 468 if (pcbddc->vec1_R) { 469 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 470 } 471 if (n_R != old_size) { 472 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 473 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 474 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 475 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 476 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 477 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 478 } 479 /* local primal dofs */ 480 old_size = -1; 481 if (pcbddc->vec1_P) { 482 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 483 } 484 if (pcbddc->local_primal_size != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 486 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 487 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 488 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 489 } 490 /* local explicit constraints */ 491 old_size = -1; 492 if (pcbddc->vec1_C) { 493 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 494 } 495 if (n_constraints && n_constraints != old_size) { 496 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 497 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 498 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 499 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 500 } 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCBDDCSetUpCorrection" 506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 507 { 508 PetscErrorCode ierr; 509 /* pointers to pcis and pcbddc */ 510 PC_IS* pcis = (PC_IS*)pc->data; 511 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 512 /* submatrices of local problem */ 513 Mat A_RV,A_VR,A_VV; 514 /* submatrices of local coarse problem */ 515 Mat S_VV,S_CV,S_VC,S_CC; 516 /* working matrices */ 517 Mat C_CR; 518 /* additional working stuff */ 519 PC pc_R; 520 Mat F; 521 PetscBool isLU,isCHOL,isILU; 522 523 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 524 PetscScalar *work; 525 PetscInt *idx_V_B; 526 PetscInt n,n_vertices,n_constraints; 527 PetscInt i,n_R,n_D,n_B; 528 PetscBool unsymmetric_check; 529 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 530 MatType impMatType; 531 /* some shortcuts to scalars */ 532 PetscScalar one=1.0,m_one=-1.0; 533 534 PetscFunctionBegin; 535 /* get number of vertices (corners plus constraints with change of basis) 536 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 537 n_vertices = pcbddc->n_actual_vertices; 538 n_constraints = pcbddc->local_primal_size-n_vertices; 539 /* Set Non-overlapping dimensions */ 540 n_B = pcis->n_B; n_D = pcis->n - n_B; 541 n_R = pcis->n-n_vertices; 542 543 /* Set types for local objects needed by BDDC precondtioner */ 544 impMatType = MATSEQDENSE; 545 546 /* vertices in boundary numbering */ 547 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 548 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 549 if (i != n_vertices) { 550 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 551 } 552 553 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 554 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 555 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 556 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 557 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 558 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 560 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 563 564 unsymmetric_check = PETSC_FALSE; 565 /* allocate workspace */ 566 n = 0; 567 if (n_constraints) { 568 n += n_R*n_constraints; 569 } 570 if (n_vertices) { 571 n = PetscMax(2*n_R*n_vertices,n); 572 } 573 if (!pcbddc->issym) { 574 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 575 unsymmetric_check = PETSC_TRUE; 576 } 577 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 578 579 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 580 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 581 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 582 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 583 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 584 if (isLU || isILU || isCHOL) { 585 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 586 } else { 587 F = NULL; 588 } 589 590 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 591 if (n_constraints) { 592 Mat M1,M2,M3; 593 IS is_aux; 594 /* see if we can save some allocations */ 595 if (pcbddc->local_auxmat2) { 596 PetscInt on_R,on_constraints; 597 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 598 if (on_R != n_R || on_constraints != n_constraints) { 599 PetscScalar *marray; 600 601 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 602 ierr = PetscFree(marray);CHKERRQ(ierr); 603 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 604 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 605 } 606 } 607 /* auxiliary matrices */ 608 if (!pcbddc->local_auxmat2) { 609 PetscScalar *marray; 610 611 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 612 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 marray += n_R*n_constraints; 614 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 615 } 616 617 /* Extract constraints on R nodes: C_{CR} */ 618 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 619 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 620 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 621 622 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 623 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 624 for (i=0;i<n_constraints;i++) { 625 const PetscScalar *row_cmat_values; 626 const PetscInt *row_cmat_indices; 627 PetscInt size_of_constraint,j; 628 629 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 630 for (j=0;j<size_of_constraint;j++) { 631 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 632 } 633 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 634 } 635 if (F) { 636 Mat B; 637 638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 639 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 640 ierr = MatDestroy(&B);CHKERRQ(ierr); 641 } else { 642 PetscScalar *xarray; 643 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 652 } 653 654 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 655 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 656 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 657 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 658 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 659 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 660 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 661 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 662 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 663 ierr = MatDestroy(&M2);CHKERRQ(ierr); 664 ierr = MatDestroy(&M3);CHKERRQ(ierr); 665 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 666 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 667 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 668 ierr = MatDestroy(&M1);CHKERRQ(ierr); 669 } 670 /* Get submatrices from subdomain matrix */ 671 if (n_vertices) { 672 Mat newmat; 673 IS is_aux; 674 PetscInt ibs,mbs; 675 PetscBool issbaij; 676 677 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 678 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 679 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 680 if (ibs != mbs) { /* need to convert to SEQAIJ */ 681 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 682 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 683 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 684 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 685 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 686 } else { 687 /* this is safe */ 688 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 689 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 690 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 691 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 692 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 693 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 694 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 695 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 696 } else { 697 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 698 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 699 } 700 } 701 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 702 } 703 704 /* Matrix of coarse basis functions (local) */ 705 if (pcbddc->coarse_phi_B) { 706 PetscInt on_B,on_primal,on_D=n_D; 707 if (pcbddc->coarse_phi_D) { 708 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 709 } 710 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 711 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 712 PetscScalar *marray; 713 714 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 715 ierr = PetscFree(marray);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 717 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 720 } 721 } 722 723 if (!pcbddc->coarse_phi_B) { 724 PetscScalar *marray; 725 726 n = n_B*pcbddc->local_primal_size; 727 if (pcbddc->switch_static || pcbddc->dbg_flag) { 728 n += n_D*pcbddc->local_primal_size; 729 } 730 if (!pcbddc->issym) { 731 n *= 2; 732 } 733 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 735 n = n_B*pcbddc->local_primal_size; 736 if (pcbddc->switch_static || pcbddc->dbg_flag) { 737 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 738 n += n_D*pcbddc->local_primal_size; 739 } 740 if (!pcbddc->issym) { 741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 742 if (pcbddc->switch_static || pcbddc->dbg_flag) { 743 n = n_B*pcbddc->local_primal_size; 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 745 } 746 } else { 747 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 748 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 749 if (pcbddc->switch_static || pcbddc->dbg_flag) { 750 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 751 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 752 } 753 } 754 } 755 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 756 /* vertices */ 757 if (n_vertices) { 758 759 if (n_R) { 760 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 761 PetscBLASInt B_N,B_one = 1; 762 PetscScalar *x,*y; 763 764 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 766 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 767 if (F) { 768 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 769 } else { 770 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 771 for (i=0;i<n_vertices;i++) { 772 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 773 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 774 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 775 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 776 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 777 } 778 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 779 } 780 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 781 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 782 if (n_constraints) { 783 Mat B; 784 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 786 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 787 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 788 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 789 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 790 ierr = MatDestroy(&B);CHKERRQ(ierr); 791 } 792 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 793 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 794 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 795 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 796 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 797 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 798 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 799 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 800 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 801 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 802 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 803 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 804 } else { 805 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 806 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 807 } 808 /* coarse basis functions */ 809 for (i=0;i<n_vertices;i++) { 810 PetscScalar *y; 811 812 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 813 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 814 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 815 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 y[n_B*i+idx_V_B[i]] = 1.0; 818 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 819 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 820 821 if (pcbddc->switch_static || pcbddc->dbg_flag) { 822 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 823 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 824 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 827 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 828 } 829 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 830 } 831 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 832 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 833 } 834 835 if (n_constraints) { 836 Mat B; 837 838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 839 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 840 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 841 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 842 if (n_vertices) { 843 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 844 } 845 ierr = MatDestroy(&B);CHKERRQ(ierr); 846 /* coarse basis functions */ 847 for (i=0;i<n_constraints;i++) { 848 PetscScalar *y; 849 850 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 851 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 852 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 853 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 854 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 856 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 857 858 if (pcbddc->switch_static || pcbddc->dbg_flag) { 859 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 860 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 861 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 864 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 865 } 866 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 867 } 868 } 869 870 /* compute other basis functions for non-symmetric problems */ 871 if (!pcbddc->issym) { 872 Mat B,X; 873 874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 875 876 if (n_constraints) { 877 Mat S_CCT,B_C; 878 879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 880 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 881 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 882 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 883 if (n_vertices) { 884 Mat B_V,S_VCT; 885 886 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 887 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 888 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 889 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 890 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 891 } 892 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 893 } 894 if (n_vertices && n_R) { 895 Mat A_VRT; 896 PetscBLASInt B_N,B_one = 1; 897 898 if (!n_constraints) { /* if there are no constraints, reset work */ 899 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 900 } 901 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 902 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 903 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 904 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 905 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 906 } 907 908 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 909 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 910 for (i=0;i<pcbddc->local_primal_size;i++) { 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 912 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 913 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 914 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 915 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 916 } 917 } else { 918 for (i=0;i<pcbddc->local_primal_size;i++) { 919 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 921 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 922 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 923 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 924 } 925 } 926 ierr = MatDestroy(&B);CHKERRQ(ierr); 927 /* coarse basis functions */ 928 for (i=0;i<pcbddc->local_primal_size;i++) { 929 PetscScalar *y; 930 931 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 932 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 933 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 934 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 if (i<n_vertices) { 937 y[n_B*i+idx_V_B[i]] = 1.0; 938 } 939 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 940 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 941 942 if (pcbddc->switch_static || pcbddc->dbg_flag) { 943 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 944 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 945 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 948 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 949 } 950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 951 } 952 ierr = MatDestroy(&X);CHKERRQ(ierr); 953 } 954 955 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 956 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 957 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 958 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 959 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 960 /* Checking coarse_sub_mat and coarse basis functios */ 961 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 962 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 963 if (pcbddc->dbg_flag) { 964 Mat coarse_sub_mat; 965 Mat AUXMAT,TM1,TM2,TM3,TM4; 966 Mat coarse_phi_D,coarse_phi_B; 967 Mat coarse_psi_D,coarse_psi_B; 968 Mat A_II,A_BB,A_IB,A_BI; 969 Mat C_B,CPHI; 970 IS is_dummy; 971 Vec mones; 972 MatType checkmattype=MATSEQAIJ; 973 PetscReal real_value; 974 975 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 976 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 977 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 978 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 979 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 980 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 981 if (unsymmetric_check) { 982 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 983 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 984 } 985 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 986 987 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 990 if (unsymmetric_check) { 991 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 992 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 993 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 994 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 995 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 996 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 997 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 998 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 999 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1000 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1001 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1002 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1003 } else { 1004 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1005 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1006 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1007 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1008 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1009 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1010 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1011 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1012 } 1013 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1014 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1015 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1016 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1017 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1018 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1019 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1021 1022 /* check constraints */ 1023 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1024 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1025 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1026 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1027 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1028 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1029 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1030 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1031 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1032 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1033 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1034 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1035 1036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1037 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1038 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1039 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1040 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1041 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1042 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1043 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1044 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1045 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1046 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1047 if (unsymmetric_check) { 1048 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1049 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1050 } 1051 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1052 } 1053 1054 /* free memory */ 1055 ierr = PetscFree(work);CHKERRQ(ierr); 1056 if (n_vertices) { 1057 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1058 } 1059 if (n_constraints) { 1060 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1061 } 1062 /* get back data */ 1063 *coarse_submat_vals_n = coarse_submat_vals; 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1069 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1070 { 1071 Mat *work_mat; 1072 IS isrow_s,iscol_s; 1073 PetscBool rsorted,csorted; 1074 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1075 PetscErrorCode ierr; 1076 1077 PetscFunctionBegin; 1078 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1079 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1080 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1081 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1082 1083 if (!rsorted) { 1084 const PetscInt *idxs; 1085 PetscInt *idxs_sorted,i; 1086 1087 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1088 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1089 for (i=0;i<rsize;i++) { 1090 idxs_perm_r[i] = i; 1091 } 1092 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1093 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1094 for (i=0;i<rsize;i++) { 1095 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1096 } 1097 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1098 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1099 } else { 1100 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1101 isrow_s = isrow; 1102 } 1103 1104 if (!csorted) { 1105 if (isrow == iscol) { 1106 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1107 iscol_s = isrow_s; 1108 } else { 1109 const PetscInt *idxs; 1110 PetscInt *idxs_sorted,i; 1111 1112 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1113 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1114 for (i=0;i<csize;i++) { 1115 idxs_perm_c[i] = i; 1116 } 1117 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1119 for (i=0;i<csize;i++) { 1120 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1121 } 1122 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1123 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1124 } 1125 } else { 1126 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1127 iscol_s = iscol; 1128 } 1129 1130 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1131 1132 if (!rsorted || !csorted) { 1133 Mat new_mat; 1134 IS is_perm_r,is_perm_c; 1135 1136 if (!rsorted) { 1137 PetscInt *idxs_r,i; 1138 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1139 for (i=0;i<rsize;i++) { 1140 idxs_r[idxs_perm_r[i]] = i; 1141 } 1142 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1143 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1144 } else { 1145 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1146 } 1147 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1148 1149 if (!csorted) { 1150 if (isrow_s == iscol_s) { 1151 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1152 is_perm_c = is_perm_r; 1153 } else { 1154 PetscInt *idxs_c,i; 1155 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1156 for (i=0;i<csize;i++) { 1157 idxs_c[idxs_perm_c[i]] = i; 1158 } 1159 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1160 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1161 } 1162 } else { 1163 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1164 } 1165 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1166 1167 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1168 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1169 work_mat[0] = new_mat; 1170 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1171 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1172 } 1173 1174 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1175 *B = work_mat[0]; 1176 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1177 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1178 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1184 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1185 { 1186 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1187 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1188 Mat new_mat; 1189 IS is_local,is_global; 1190 PetscInt local_size; 1191 PetscBool isseqaij; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1196 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1197 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1198 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1199 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1200 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1201 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1202 1203 /* check */ 1204 if (pcbddc->dbg_flag) { 1205 Vec x,x_change; 1206 PetscReal error; 1207 1208 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1209 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1210 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1211 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1214 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1216 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1217 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1220 ierr = VecDestroy(&x);CHKERRQ(ierr); 1221 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1222 } 1223 1224 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1225 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1226 if (isseqaij) { 1227 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1228 } else { 1229 Mat work_mat; 1230 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1231 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1232 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1233 } 1234 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1235 #if !defined(PETSC_USE_COMPLEX) 1236 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1237 #endif 1238 /* 1239 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1240 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1241 */ 1242 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1248 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1249 { 1250 PC_IS* pcis = (PC_IS*)(pc->data); 1251 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1252 IS is_aux1,is_aux2; 1253 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1254 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1255 PetscInt vbs,bs; 1256 PetscBT bitmask; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 /* 1261 No need to setup local scatters if 1262 - primal space is unchanged 1263 AND 1264 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1265 AND 1266 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1267 */ 1268 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1269 PetscFunctionReturn(0); 1270 } 1271 /* destroy old objects */ 1272 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1273 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1274 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1275 /* Set Non-overlapping dimensions */ 1276 n_B = pcis->n_B; n_D = pcis->n - n_B; 1277 n_vertices = pcbddc->n_actual_vertices; 1278 /* create auxiliary bitmask */ 1279 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1280 for (i=0;i<n_vertices;i++) { 1281 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1282 } 1283 1284 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1285 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1286 for (i=0, n_R=0; i<pcis->n; i++) { 1287 if (!PetscBTLookup(bitmask,i)) { 1288 idx_R_local[n_R] = i; 1289 n_R++; 1290 } 1291 } 1292 1293 /* Block code */ 1294 vbs = 1; 1295 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1296 if (bs>1 && !(n_vertices%bs)) { 1297 PetscBool is_blocked = PETSC_TRUE; 1298 PetscInt *vary; 1299 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1300 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1301 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1302 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1303 for (i=0; i<n_vertices/bs; i++) { 1304 if (vary[i]!=0 && vary[i]!=bs) { 1305 is_blocked = PETSC_FALSE; 1306 break; 1307 } 1308 } 1309 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1310 vbs = bs; 1311 for (i=0;i<n_R/vbs;i++) { 1312 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1313 } 1314 } 1315 ierr = PetscFree(vary);CHKERRQ(ierr); 1316 } 1317 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1318 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1319 1320 /* print some info if requested */ 1321 if (pcbddc->dbg_flag) { 1322 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1323 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1329 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1330 } 1331 1332 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1333 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1334 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1335 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1336 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1337 for (i=0; i<n_D; i++) { 1338 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1339 } 1340 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1341 for (i=0, j=0; i<n_R; i++) { 1342 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1343 aux_array1[j++] = i; 1344 } 1345 } 1346 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1347 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1348 for (i=0, j=0; i<n_B; i++) { 1349 if (!PetscBTLookup(bitmask,is_indices[i])) { 1350 aux_array2[j++] = i; 1351 } 1352 } 1353 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1354 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1355 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1356 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1357 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1358 1359 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1360 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1361 for (i=0, j=0; i<n_R; i++) { 1362 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1363 aux_array1[j++] = i; 1364 } 1365 } 1366 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1367 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1368 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1369 } 1370 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1371 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1378 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1379 { 1380 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1381 PC_IS *pcis = (PC_IS*)pc->data; 1382 PC pc_temp; 1383 Mat A_RR; 1384 MatReuse reuse; 1385 PetscScalar m_one = -1.0; 1386 PetscReal value; 1387 PetscInt n_D,n_R,ibs,mbs; 1388 PetscBool use_exact,use_exact_reduced,issbaij; 1389 PetscErrorCode ierr; 1390 /* prefixes stuff */ 1391 char dir_prefix[256],neu_prefix[256],str_level[16]; 1392 size_t len; 1393 1394 PetscFunctionBegin; 1395 1396 /* compute prefixes */ 1397 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1398 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1399 if (!pcbddc->current_level) { 1400 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1401 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1402 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1403 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1406 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1407 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1408 len -= 15; /* remove "pc_bddc_coarse_" */ 1409 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1410 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1411 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1412 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1413 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1414 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1415 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1416 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1417 } 1418 1419 /* DIRICHLET PROBLEM */ 1420 if (dirichlet) { 1421 if (pcbddc->issym) { 1422 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1423 } 1424 /* Matrix for Dirichlet problem is pcis->A_II */ 1425 n_D = pcis->n - pcis->n_B; 1426 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1427 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1428 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1429 /* default */ 1430 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1431 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1433 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1434 if (issbaij) { 1435 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1436 } else { 1437 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1438 } 1439 /* Allow user's customization */ 1440 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1441 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1442 } 1443 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1444 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1445 if (!n_D) { 1446 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1447 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1448 } 1449 /* Set Up KSP for Dirichlet problem of BDDC */ 1450 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1451 /* set ksp_D into pcis data */ 1452 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1453 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1454 pcis->ksp_D = pcbddc->ksp_D; 1455 } 1456 1457 /* NEUMANN PROBLEM */ 1458 A_RR = 0; 1459 if (neumann) { 1460 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1461 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1462 if (pcbddc->ksp_R) { /* already created ksp */ 1463 PetscInt nn_R; 1464 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1465 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1466 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1467 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1468 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1469 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1470 reuse = MAT_INITIAL_MATRIX; 1471 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1472 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1473 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1474 reuse = MAT_INITIAL_MATRIX; 1475 } else { /* safe to reuse the matrix */ 1476 reuse = MAT_REUSE_MATRIX; 1477 } 1478 } 1479 /* last check */ 1480 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1481 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1482 reuse = MAT_INITIAL_MATRIX; 1483 } 1484 } else { /* first time, so we need to create the matrix */ 1485 reuse = MAT_INITIAL_MATRIX; 1486 } 1487 /* extract A_RR */ 1488 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1489 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1490 if (ibs != mbs) { 1491 Mat newmat; 1492 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1493 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1494 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1495 } else { 1496 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1497 } 1498 if (pcbddc->issym) { 1499 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1500 } 1501 if (!pcbddc->ksp_R) { /* create object if not present */ 1502 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1503 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1504 /* default */ 1505 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1506 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1507 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1508 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1509 if (issbaij) { 1510 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1511 } else { 1512 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1513 } 1514 /* Allow user's customization */ 1515 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1516 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1517 } 1518 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1519 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1520 if (!n_R) { 1521 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1522 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1523 } 1524 /* Set Up KSP for Neumann problem of BDDC */ 1525 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1526 } 1527 1528 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1529 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1530 if (pcbddc->dbg_flag) { 1531 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1532 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1533 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1534 } 1535 if (dirichlet) { /* Dirichlet */ 1536 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1537 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1538 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1539 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1540 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1541 /* need to be adapted? */ 1542 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1543 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1544 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1545 /* print info */ 1546 if (pcbddc->dbg_flag) { 1547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1549 } 1550 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1551 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1552 } 1553 } 1554 if (neumann) { /* Neumann */ 1555 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1556 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1557 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1558 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1559 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1560 /* need to be adapted? */ 1561 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1562 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1563 /* print info */ 1564 if (pcbddc->dbg_flag) { 1565 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1567 } 1568 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1569 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1570 } 1571 } 1572 } 1573 /* free Neumann problem's matrix */ 1574 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1580 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1581 { 1582 PetscErrorCode ierr; 1583 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1584 1585 PetscFunctionBegin; 1586 if (applytranspose) { 1587 if (pcbddc->local_auxmat1) { 1588 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1589 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1590 } 1591 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1592 } else { 1593 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1594 if (pcbddc->local_auxmat1) { 1595 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1596 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1597 } 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1605 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1606 { 1607 PetscErrorCode ierr; 1608 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1609 PC_IS* pcis = (PC_IS*) (pc->data); 1610 const PetscScalar zero = 0.0; 1611 1612 PetscFunctionBegin; 1613 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1614 if (applytranspose) { 1615 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1616 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1617 } else { 1618 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1619 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1620 } 1621 /* start communications from local primal nodes to rhs of coarse solver */ 1622 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1623 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 1626 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1627 /* TODO remove null space when doing multilevel */ 1628 if (pcbddc->coarse_ksp) { 1629 Vec rhs,sol; 1630 1631 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1632 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1633 if (applytranspose) { 1634 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1635 } else { 1636 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1637 } 1638 } 1639 1640 /* Local solution on R nodes */ 1641 if (pcis->n) { 1642 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1643 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1645 if (pcbddc->switch_static) { 1646 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1648 } 1649 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1650 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1651 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 if (pcbddc->switch_static) { 1654 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1655 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 } 1657 } 1658 1659 /* communications from coarse sol to local primal nodes */ 1660 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1661 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1662 1663 /* Sum contributions from two levels */ 1664 if (applytranspose) { 1665 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1666 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1667 } else { 1668 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1669 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1676 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1677 { 1678 PetscErrorCode ierr; 1679 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1680 PetscScalar *array; 1681 Vec from,to; 1682 1683 PetscFunctionBegin; 1684 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1685 from = pcbddc->coarse_vec; 1686 to = pcbddc->vec1_P; 1687 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1688 Vec tvec; 1689 1690 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1691 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1692 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1693 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1694 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1695 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1696 } 1697 } else { /* from local to global -> put data in coarse right hand side */ 1698 from = pcbddc->vec1_P; 1699 to = pcbddc->coarse_vec; 1700 } 1701 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1707 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1708 { 1709 PetscErrorCode ierr; 1710 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1711 PetscScalar *array; 1712 Vec from,to; 1713 1714 PetscFunctionBegin; 1715 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1716 from = pcbddc->coarse_vec; 1717 to = pcbddc->vec1_P; 1718 } else { /* from local to global -> put data in coarse right hand side */ 1719 from = pcbddc->vec1_P; 1720 to = pcbddc->coarse_vec; 1721 } 1722 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1723 if (smode == SCATTER_FORWARD) { 1724 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1725 Vec tvec; 1726 1727 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1728 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1729 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1730 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1731 } 1732 } else { 1733 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1734 ierr = VecResetArray(from);CHKERRQ(ierr); 1735 } 1736 } 1737 PetscFunctionReturn(0); 1738 } 1739 1740 /* uncomment for testing purposes */ 1741 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1744 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1745 { 1746 PetscErrorCode ierr; 1747 PC_IS* pcis = (PC_IS*)(pc->data); 1748 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1749 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1750 /* one and zero */ 1751 PetscScalar one=1.0,zero=0.0; 1752 /* space to store constraints and their local indices */ 1753 PetscScalar *temp_quadrature_constraint; 1754 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1755 /* iterators */ 1756 PetscInt i,j,k,total_counts,temp_start_ptr; 1757 /* BLAS integers */ 1758 PetscBLASInt lwork,lierr; 1759 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1760 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1761 /* reuse */ 1762 PetscInt olocal_primal_size; 1763 PetscInt *oprimal_indices_local_idxs; 1764 /* change of basis */ 1765 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1766 PetscBool boolforchange,qr_needed; 1767 PetscBT touched,change_basis,qr_needed_idx; 1768 /* auxiliary stuff */ 1769 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1770 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1771 PetscScalar *temp_quadrature_constraint_work=NULL; 1772 /* some quantities */ 1773 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1774 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1775 1776 PetscFunctionBegin; 1777 /* Destroy Mat objects computed previously */ 1778 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1779 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1780 1781 /* print some info */ 1782 if (pcbddc->dbg_flag) { 1783 IS vertices; 1784 PetscInt nv,nedges,nfaces; 1785 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1786 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1787 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1788 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1791 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1792 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1793 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1794 } 1795 1796 if (!pcbddc->adaptive_selection) { 1797 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1798 MatNullSpace nearnullsp; 1799 const Vec *nearnullvecs; 1800 Vec *localnearnullsp; 1801 PetscScalar *array; 1802 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1803 PetscBool nnsp_has_cnst; 1804 /* LAPACK working arrays for SVD or POD */ 1805 PetscBool skip_lapack; 1806 PetscScalar *work; 1807 PetscReal *singular_vals; 1808 #if defined(PETSC_USE_COMPLEX) 1809 PetscReal *rwork; 1810 #endif 1811 #if defined(PETSC_MISSING_LAPACK_GESVD) 1812 PetscScalar *temp_basis,*correlation_mat; 1813 #else 1814 PetscBLASInt dummy_int=1; 1815 PetscScalar dummy_scalar=1.; 1816 #endif 1817 1818 /* Get index sets for faces, edges and vertices from graph */ 1819 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1820 /* free unneeded index sets */ 1821 if (!pcbddc->use_vertices) { 1822 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1823 } 1824 if (!pcbddc->use_edges) { 1825 for (i=0;i<n_ISForEdges;i++) { 1826 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1827 } 1828 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1829 n_ISForEdges = 0; 1830 } 1831 if (!pcbddc->use_faces) { 1832 for (i=0;i<n_ISForFaces;i++) { 1833 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1834 } 1835 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1836 n_ISForFaces = 0; 1837 } 1838 /* HACKS (the following two blocks of code) */ 1839 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1840 pcbddc->use_change_of_basis = PETSC_TRUE; 1841 if (!ISForEdges) { 1842 pcbddc->use_change_on_faces = PETSC_TRUE; 1843 } 1844 } 1845 if (pcbddc->NullSpace) { 1846 /* use_change_of_basis should be consistent among processors */ 1847 PetscBool tbool[2],gbool[2]; 1848 tbool[0] = pcbddc->use_change_of_basis; 1849 tbool[1] = pcbddc->use_change_on_faces; 1850 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1851 pcbddc->use_change_of_basis = gbool[0]; 1852 pcbddc->use_change_on_faces = gbool[1]; 1853 } 1854 1855 /* check if near null space is attached to global mat */ 1856 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1857 if (nearnullsp) { 1858 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1859 /* remove any stored info */ 1860 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1861 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1862 /* store information for BDDC solver reuse */ 1863 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1864 pcbddc->onearnullspace = nearnullsp; 1865 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1866 for (i=0;i<nnsp_size;i++) { 1867 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1868 } 1869 } else { /* if near null space is not provided BDDC uses constants by default */ 1870 nnsp_size = 0; 1871 nnsp_has_cnst = PETSC_TRUE; 1872 } 1873 /* get max number of constraints on a single cc */ 1874 max_constraints = nnsp_size; 1875 if (nnsp_has_cnst) max_constraints++; 1876 1877 /* 1878 Evaluate maximum storage size needed by the procedure 1879 - temp_indices will contain start index of each constraint stored as follows 1880 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1881 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1882 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1883 */ 1884 total_counts = n_ISForFaces+n_ISForEdges; 1885 total_counts *= max_constraints; 1886 n_vertices = 0; 1887 if (ISForVertices) { 1888 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1889 } 1890 total_counts += n_vertices; 1891 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1892 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1893 total_counts = 0; 1894 max_size_of_constraint = 0; 1895 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1896 if (i<n_ISForEdges) { 1897 used_IS = &ISForEdges[i]; 1898 } else { 1899 used_IS = &ISForFaces[i-n_ISForEdges]; 1900 } 1901 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1902 total_counts += j; 1903 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1904 } 1905 total_counts *= max_constraints; 1906 total_counts += n_vertices; 1907 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1908 /* get local part of global near null space vectors */ 1909 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1910 for (k=0;k<nnsp_size;k++) { 1911 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1912 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1913 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1914 } 1915 1916 /* whether or not to skip lapack calls */ 1917 skip_lapack = PETSC_TRUE; 1918 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1919 1920 /* allocate some auxiliary stuff */ 1921 if (!skip_lapack || pcbddc->use_qr_single) { 1922 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1923 } else { 1924 gidxs = NULL; 1925 permutation = NULL; 1926 temp_indices_to_constraint_work = NULL; 1927 temp_quadrature_constraint_work = NULL; 1928 } 1929 1930 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1931 if (!skip_lapack) { 1932 PetscScalar temp_work; 1933 1934 #if defined(PETSC_MISSING_LAPACK_GESVD) 1935 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1936 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1937 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1938 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1939 #if defined(PETSC_USE_COMPLEX) 1940 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1941 #endif 1942 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1943 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1944 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1945 lwork = -1; 1946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1947 #if !defined(PETSC_USE_COMPLEX) 1948 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1949 #else 1950 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1951 #endif 1952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1953 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1954 #else /* on missing GESVD */ 1955 /* SVD */ 1956 PetscInt max_n,min_n; 1957 max_n = max_size_of_constraint; 1958 min_n = max_constraints; 1959 if (max_size_of_constraint < max_constraints) { 1960 min_n = max_size_of_constraint; 1961 max_n = max_constraints; 1962 } 1963 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1964 #if defined(PETSC_USE_COMPLEX) 1965 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1966 #endif 1967 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1968 lwork = -1; 1969 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1970 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1972 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1973 #if !defined(PETSC_USE_COMPLEX) 1974 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1975 #else 1976 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1977 #endif 1978 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1979 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1980 #endif /* on missing GESVD */ 1981 /* Allocate optimal workspace */ 1982 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1983 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1984 } 1985 /* Now we can loop on constraining sets */ 1986 total_counts = 0; 1987 temp_indices[0] = 0; 1988 /* vertices */ 1989 if (ISForVertices) { 1990 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1991 if (nnsp_has_cnst) { /* consider all vertices */ 1992 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1993 for (i=0;i<n_vertices;i++) { 1994 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1995 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1996 total_counts++; 1997 } 1998 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1999 PetscBool used_vertex; 2000 for (i=0;i<n_vertices;i++) { 2001 used_vertex = PETSC_FALSE; 2002 k = 0; 2003 while (!used_vertex && k<nnsp_size) { 2004 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2005 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2006 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2007 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2008 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2009 total_counts++; 2010 used_vertex = PETSC_TRUE; 2011 } 2012 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2013 k++; 2014 } 2015 } 2016 } 2017 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2018 n_vertices = total_counts; 2019 } 2020 2021 /* edges and faces */ 2022 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2023 if (ncc<n_ISForEdges) { 2024 used_IS = &ISForEdges[ncc]; 2025 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2026 } else { 2027 used_IS = &ISForFaces[ncc-n_ISForEdges]; 2028 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2029 } 2030 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2031 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2032 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2033 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2034 /* change of basis should not be performed on local periodic nodes */ 2035 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2036 if (nnsp_has_cnst) { 2037 PetscScalar quad_value; 2038 temp_constraints++; 2039 if (!pcbddc->use_nnsp_true) { 2040 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2041 } else { 2042 quad_value = 1.0; 2043 } 2044 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2045 for (j=0;j<size_of_constraint;j++) { 2046 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2047 } 2048 /* sort by global ordering if using lapack subroutines (not needed!) */ 2049 if (!skip_lapack || pcbddc->use_qr_single) { 2050 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2051 for (j=0;j<size_of_constraint;j++) { 2052 permutation[j]=j; 2053 } 2054 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2055 for (j=0;j<size_of_constraint;j++) { 2056 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2057 } 2058 for (j=0;j<size_of_constraint;j++) { 2059 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2060 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2061 } 2062 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2063 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2064 } 2065 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2066 total_counts++; 2067 } 2068 for (k=0;k<nnsp_size;k++) { 2069 PetscReal real_value; 2070 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2071 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2072 for (j=0;j<size_of_constraint;j++) { 2073 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2074 } 2075 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2076 /* check if array is null on the connected component */ 2077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2078 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2079 if (real_value > 0.0) { /* keep indices and values */ 2080 /* sort by global ordering if using lapack subroutines */ 2081 if (!skip_lapack || pcbddc->use_qr_single) { 2082 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2083 for (j=0;j<size_of_constraint;j++) { 2084 permutation[j]=j; 2085 } 2086 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2087 for (j=0;j<size_of_constraint;j++) { 2088 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2089 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2090 } 2091 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2092 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2093 } 2094 temp_constraints++; 2095 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2096 total_counts++; 2097 } 2098 } 2099 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2100 valid_constraints = temp_constraints; 2101 if (!pcbddc->use_nnsp_true && temp_constraints) { 2102 if (temp_constraints == 1) { /* just normalize the constraint */ 2103 PetscScalar norm; 2104 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2105 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2106 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2107 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2108 } else { /* perform SVD */ 2109 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2110 2111 #if defined(PETSC_MISSING_LAPACK_GESVD) 2112 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2113 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2114 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2115 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2116 from that computed using LAPACKgesvd 2117 -> This is due to a different computation of eigenvectors in LAPACKheev 2118 -> The quality of the POD-computed basis will be the same */ 2119 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2120 /* Store upper triangular part of correlation matrix */ 2121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2122 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2123 for (j=0;j<temp_constraints;j++) { 2124 for (k=0;k<j+1;k++) { 2125 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2126 } 2127 } 2128 /* compute eigenvalues and eigenvectors of correlation matrix */ 2129 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2130 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2131 #if !defined(PETSC_USE_COMPLEX) 2132 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2133 #else 2134 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2135 #endif 2136 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2137 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2138 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2139 j = 0; 2140 while (j < temp_constraints && singular_vals[j] < tol) j++; 2141 total_counts = total_counts-j; 2142 valid_constraints = temp_constraints-j; 2143 /* scale and copy POD basis into used quadrature memory */ 2144 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2145 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2148 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2150 if (j<temp_constraints) { 2151 PetscInt ii; 2152 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2153 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2154 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2155 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2156 for (k=0;k<temp_constraints-j;k++) { 2157 for (ii=0;ii<size_of_constraint;ii++) { 2158 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2159 } 2160 } 2161 } 2162 #else /* on missing GESVD */ 2163 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2164 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2166 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2167 #if !defined(PETSC_USE_COMPLEX) 2168 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2169 #else 2170 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2171 #endif 2172 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2173 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2174 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2175 k = temp_constraints; 2176 if (k > size_of_constraint) k = size_of_constraint; 2177 j = 0; 2178 while (j < k && singular_vals[k-j-1] < tol) j++; 2179 valid_constraints = k-j; 2180 total_counts = total_counts-temp_constraints+valid_constraints; 2181 #endif /* on missing GESVD */ 2182 } 2183 } 2184 /* setting change_of_basis flag is safe now */ 2185 if (boolforchange) { 2186 for (j=0;j<valid_constraints;j++) { 2187 PetscBTSet(change_basis,total_counts-j-1); 2188 } 2189 } 2190 } 2191 /* free workspace */ 2192 if (!skip_lapack || pcbddc->use_qr_single) { 2193 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2194 } 2195 if (!skip_lapack) { 2196 ierr = PetscFree(work);CHKERRQ(ierr); 2197 #if defined(PETSC_USE_COMPLEX) 2198 ierr = PetscFree(rwork);CHKERRQ(ierr); 2199 #endif 2200 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2201 #if defined(PETSC_MISSING_LAPACK_GESVD) 2202 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2203 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2204 #endif 2205 } 2206 for (k=0;k<nnsp_size;k++) { 2207 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2208 } 2209 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2210 /* free index sets of faces, edges and vertices */ 2211 for (i=0;i<n_ISForFaces;i++) { 2212 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2213 } 2214 if (n_ISForFaces) { 2215 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2216 } 2217 for (i=0;i<n_ISForEdges;i++) { 2218 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2219 } 2220 if (n_ISForEdges) { 2221 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2222 } 2223 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2224 } else { 2225 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2226 PetscInt cum = 0; 2227 2228 total_counts = 0; 2229 n_vertices = 0; 2230 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 2231 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2232 } 2233 max_constraints = 0; 2234 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2235 total_counts += pcbddc->adaptive_constraints_n[i]; 2236 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2237 } 2238 temp_indices = pcbddc->adaptive_constraints_ptrs; 2239 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2240 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2241 2242 #if 0 2243 printf("Found %d totals\n",total_counts); 2244 for (i=0;i<total_counts;i++) { 2245 printf("const %d, start %d",i,temp_indices[i]); 2246 printf(" end %d:\n",temp_indices[i+1]); 2247 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2248 printf(" idxs %d",temp_indices_to_constraint[j]); 2249 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2250 } 2251 } 2252 for (i=0;i<n_vertices;i++) { 2253 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2254 } 2255 for (i=0;i<sub_schurs->n_subs;i++) { 2256 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2257 } 2258 #endif 2259 2260 max_size_of_constraint = 0; 2261 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2262 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2263 /* Change of basis */ 2264 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2265 if (pcbddc->use_change_of_basis) { 2266 cum = n_vertices; 2267 for (i=0;i<sub_schurs->n_subs;i++) { 2268 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2269 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2270 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2271 } 2272 } 2273 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2274 } 2275 } 2276 } 2277 2278 /* map temp_indices_to_constraint in boundary numbering */ 2279 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2280 if (i != temp_indices[total_counts]) { 2281 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2282 } 2283 2284 /* set quantities in pcbddc data structure and store previous primal size */ 2285 /* n_vertices defines the number of subdomain corners in the primal space */ 2286 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2287 olocal_primal_size = pcbddc->local_primal_size; 2288 pcbddc->local_primal_size = total_counts; 2289 pcbddc->n_vertices = n_vertices; 2290 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2291 2292 /* Create constraint matrix */ 2293 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2294 /* so we need to set it up properly either with or without change of basis */ 2295 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2296 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2297 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2298 /* array to compute a local numbering of constraints : vertices first then constraints */ 2299 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2300 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2301 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2302 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2303 /* auxiliary stuff for basis change */ 2304 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2305 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2306 2307 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2308 total_primal_vertices=0; 2309 for (i=0;i<pcbddc->local_primal_size;i++) { 2310 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2311 if (size_of_constraint == 1) { 2312 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2313 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2314 aux_primal_minloc[total_primal_vertices]=0; 2315 total_primal_vertices++; 2316 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2317 PetscInt min_loc,min_index; 2318 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2319 /* find first untouched local node */ 2320 k = 0; 2321 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2322 min_index = global_indices[k]; 2323 min_loc = k; 2324 /* search the minimum among global nodes already untouched on the cc */ 2325 for (k=1;k<size_of_constraint;k++) { 2326 /* there can be more than one constraint on a single connected component */ 2327 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2328 min_index = global_indices[k]; 2329 min_loc = k; 2330 } 2331 } 2332 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2333 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2334 aux_primal_minloc[total_primal_vertices]=min_loc; 2335 total_primal_vertices++; 2336 } 2337 } 2338 /* determine if a QR strategy is needed for change of basis */ 2339 qr_needed = PETSC_FALSE; 2340 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2341 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2342 if (PetscBTLookup(change_basis,i)) { 2343 if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) { 2344 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2345 j = 0; 2346 for (k=0;k<size_of_constraint;k++) { 2347 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2348 j++; 2349 } 2350 } 2351 /* found more than one primal dof on the cc */ 2352 if (j > 1) { 2353 PetscBTSet(qr_needed_idx,i); 2354 qr_needed = PETSC_TRUE; 2355 } 2356 } else { 2357 PetscBTSet(qr_needed_idx,i); 2358 qr_needed = PETSC_TRUE; 2359 } 2360 } 2361 } 2362 /* free workspace */ 2363 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2364 2365 /* permute indices in order to have a sorted set of vertices */ 2366 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2367 2368 /* nonzero structure of constraint matrix */ 2369 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2370 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2371 j=total_primal_vertices; 2372 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2373 if (!PetscBTLookup(change_basis,i)) { 2374 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2375 j++; 2376 } 2377 } 2378 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2379 ierr = PetscFree(nnz);CHKERRQ(ierr); 2380 /* set values in constraint matrix */ 2381 for (i=0;i<total_primal_vertices;i++) { 2382 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2383 } 2384 total_counts = total_primal_vertices; 2385 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2386 if (!PetscBTLookup(change_basis,i)) { 2387 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2388 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2389 total_counts++; 2390 } 2391 } 2392 /* assembling */ 2393 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2395 /* 2396 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2397 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2398 */ 2399 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2400 if (pcbddc->use_change_of_basis) { 2401 /* dual and primal dofs on a single cc */ 2402 PetscInt dual_dofs,primal_dofs; 2403 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2404 PetscInt primal_counter; 2405 /* working stuff for GEQRF */ 2406 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2407 PetscBLASInt lqr_work; 2408 /* working stuff for UNGQR */ 2409 PetscScalar *gqr_work,lgqr_work_t; 2410 PetscBLASInt lgqr_work; 2411 /* working stuff for TRTRS */ 2412 PetscScalar *trs_rhs; 2413 PetscBLASInt Blas_NRHS; 2414 /* pointers for values insertion into change of basis matrix */ 2415 PetscInt *start_rows,*start_cols; 2416 PetscScalar *start_vals; 2417 /* working stuff for values insertion */ 2418 PetscBT is_primal; 2419 /* matrix sizes */ 2420 PetscInt global_size,local_size; 2421 /* temporary change of basis */ 2422 Mat localChangeOfBasisMatrix; 2423 /* extra space for debugging */ 2424 PetscScalar *dbg_work; 2425 2426 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2427 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2428 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2429 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2430 /* nonzeros for local mat */ 2431 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2432 for (i=0;i<pcis->n;i++) nnz[i]=1; 2433 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2434 if (PetscBTLookup(change_basis,i)) { 2435 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2436 if (PetscBTLookup(qr_needed_idx,i)) { 2437 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2438 } else { 2439 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2440 /* get local primal index on the cc */ 2441 j = 0; 2442 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2443 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2444 } 2445 } 2446 } 2447 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2448 ierr = PetscFree(nnz);CHKERRQ(ierr); 2449 /* Set initial identity in the matrix */ 2450 for (i=0;i<pcis->n;i++) { 2451 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2452 } 2453 2454 if (pcbddc->dbg_flag) { 2455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2456 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2457 } 2458 2459 2460 /* Now we loop on the constraints which need a change of basis */ 2461 /* 2462 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2463 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2464 2465 Basic blocks of change of basis matrix T computed by 2466 2467 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2468 2469 | 1 0 ... 0 s_1/S | 2470 | 0 1 ... 0 s_2/S | 2471 | ... | 2472 | 0 ... 1 s_{n-1}/S | 2473 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2474 2475 with S = \sum_{i=1}^n s_i^2 2476 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2477 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2478 2479 - QR decomposition of constraints otherwise 2480 */ 2481 if (qr_needed) { 2482 /* space to store Q */ 2483 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2484 /* first we issue queries for optimal work */ 2485 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2486 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2487 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2488 lqr_work = -1; 2489 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2490 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2491 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2492 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2493 lgqr_work = -1; 2494 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2495 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2496 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2497 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2498 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2499 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2500 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2501 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2502 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2503 /* array to store scaling factors for reflectors */ 2504 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2505 /* array to store rhs and solution of triangular solver */ 2506 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2507 /* allocating workspace for check */ 2508 if (pcbddc->dbg_flag) { 2509 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2510 } 2511 } 2512 /* array to store whether a node is primal or not */ 2513 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2514 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2515 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2516 if (i != total_primal_vertices) { 2517 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2518 } 2519 for (i=0;i<total_primal_vertices;i++) { 2520 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2521 } 2522 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2523 2524 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2525 /* -> using implicit ordering contained in temp_indices data */ 2526 total_counts = pcbddc->n_vertices; 2527 primal_counter = total_counts; 2528 while (total_counts<pcbddc->local_primal_size) { 2529 primal_dofs = 1; 2530 if (PetscBTLookup(change_basis,total_counts)) { 2531 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2532 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2533 primal_dofs++; 2534 } 2535 /* get constraint info */ 2536 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2537 dual_dofs = size_of_constraint-primal_dofs; 2538 2539 if (pcbddc->dbg_flag) { 2540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2541 } 2542 2543 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2544 2545 /* copy quadrature constraints for change of basis check */ 2546 if (pcbddc->dbg_flag) { 2547 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2548 } 2549 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2550 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2551 2552 /* compute QR decomposition of constraints */ 2553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2554 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2556 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2557 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2560 2561 /* explictly compute R^-T */ 2562 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2563 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2564 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2565 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2568 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2569 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2570 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2571 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2572 2573 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2574 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2575 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2576 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2577 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2578 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2579 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2580 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2581 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2582 2583 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2584 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2585 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2586 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2587 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2588 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2589 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2590 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2591 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2592 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2593 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2594 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2595 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2596 2597 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2598 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2599 /* insert cols for primal dofs */ 2600 for (j=0;j<primal_dofs;j++) { 2601 start_vals = &qr_basis[j*size_of_constraint]; 2602 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2603 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2604 } 2605 /* insert cols for dual dofs */ 2606 for (j=0,k=0;j<dual_dofs;k++) { 2607 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2608 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2609 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2610 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2611 j++; 2612 } 2613 } 2614 2615 /* check change of basis */ 2616 if (pcbddc->dbg_flag) { 2617 PetscInt ii,jj; 2618 PetscBool valid_qr=PETSC_TRUE; 2619 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2620 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2621 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2623 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2624 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2625 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2626 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2627 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2628 for (jj=0;jj<size_of_constraint;jj++) { 2629 for (ii=0;ii<primal_dofs;ii++) { 2630 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2631 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2632 } 2633 } 2634 if (!valid_qr) { 2635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2636 for (jj=0;jj<size_of_constraint;jj++) { 2637 for (ii=0;ii<primal_dofs;ii++) { 2638 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2639 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2640 } 2641 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2642 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2643 } 2644 } 2645 } 2646 } else { 2647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2648 } 2649 } 2650 } else { /* simple transformation block */ 2651 PetscInt row,col; 2652 PetscScalar val,norm; 2653 2654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2655 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2656 for (j=0;j<size_of_constraint;j++) { 2657 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2658 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2659 if (!PetscBTLookup(is_primal,row_B)) { 2660 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2661 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2662 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2663 } else { 2664 for (k=0;k<size_of_constraint;k++) { 2665 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2666 if (row != col) { 2667 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2668 } else { 2669 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2670 } 2671 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2672 } 2673 } 2674 } 2675 if (pcbddc->dbg_flag) { 2676 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2677 } 2678 } 2679 /* increment primal counter */ 2680 primal_counter += primal_dofs; 2681 } else { 2682 if (pcbddc->dbg_flag) { 2683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2684 } 2685 } 2686 /* increment constraint counter total_counts */ 2687 total_counts += primal_dofs; 2688 } 2689 2690 /* free workspace */ 2691 if (qr_needed) { 2692 if (pcbddc->dbg_flag) { 2693 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2694 } 2695 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2696 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2697 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2698 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2699 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2700 } 2701 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2702 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2703 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2704 2705 /* assembling of global change of variable */ 2706 { 2707 Mat tmat; 2708 PetscInt bs; 2709 2710 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2711 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2712 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2713 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2714 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2715 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2716 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2717 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2718 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2719 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2720 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2721 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2722 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2723 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2724 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2725 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2726 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2727 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2728 } 2729 /* check */ 2730 if (pcbddc->dbg_flag) { 2731 PetscReal error; 2732 Vec x,x_change; 2733 2734 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2735 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2736 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2737 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2738 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2739 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2740 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2741 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2742 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2743 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2744 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2745 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2746 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2747 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2748 ierr = VecDestroy(&x);CHKERRQ(ierr); 2749 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2750 } 2751 2752 /* adapt sub_schurs computed (if any) */ 2753 if (pcbddc->use_deluxe_scaling) { 2754 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2755 if (sub_schurs->S_Ej_all) { 2756 Mat S_new,tmat; 2757 IS is_all_N; 2758 2759 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2760 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2761 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2762 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2763 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2764 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2765 sub_schurs->S_Ej_all = S_new; 2766 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2767 if (sub_schurs->sum_S_Ej_all) { 2768 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2769 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2770 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2771 sub_schurs->sum_S_Ej_all = S_new; 2772 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2773 } 2774 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2775 } 2776 } 2777 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2778 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2779 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2780 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2781 } 2782 2783 /* set up change of basis context */ 2784 if (pcbddc->ChangeOfBasisMatrix) { 2785 PCBDDCChange_ctx change_ctx; 2786 2787 if (!pcbddc->new_global_mat) { 2788 PetscInt global_size,local_size; 2789 2790 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2791 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2792 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2793 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2794 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2795 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2796 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2797 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2798 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2799 } else { 2800 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2801 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2802 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2803 } 2804 if (!pcbddc->user_ChangeOfBasisMatrix) { 2805 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2806 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2807 } else { 2808 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2809 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2810 } 2811 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2812 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2813 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 } 2816 2817 /* get indices in local ordering for vertices and constraints */ 2818 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2819 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2820 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2821 } 2822 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2823 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2824 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2825 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2826 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2827 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2828 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2829 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2830 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2831 /* set quantities in PCBDDC data struct */ 2832 pcbddc->n_actual_vertices = i; 2833 /* check if a new primal space has been introduced */ 2834 pcbddc->new_primal_space_local = PETSC_TRUE; 2835 if (olocal_primal_size == pcbddc->local_primal_size) { 2836 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2837 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2838 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2839 } 2840 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2841 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2842 2843 /* flush dbg viewer */ 2844 if (pcbddc->dbg_flag) { 2845 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2846 } 2847 2848 /* free workspace */ 2849 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2850 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2851 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2852 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2853 if (!pcbddc->adaptive_selection) { 2854 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2855 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2856 } else { 2857 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2858 pcbddc->adaptive_constraints_ptrs, 2859 pcbddc->adaptive_constraints_idxs, 2860 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2861 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2862 } 2863 PetscFunctionReturn(0); 2864 } 2865 2866 #undef __FUNCT__ 2867 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2868 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2869 { 2870 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2871 PC_IS *pcis = (PC_IS*)pc->data; 2872 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2873 PetscInt ierr,i,vertex_size; 2874 PetscViewer viewer=pcbddc->dbg_viewer; 2875 2876 PetscFunctionBegin; 2877 /* Reset previously computed graph */ 2878 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2879 /* Init local Graph struct */ 2880 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2881 2882 /* Check validity of the csr graph passed in by the user */ 2883 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2884 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2885 } 2886 2887 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2888 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2889 PetscInt *xadj,*adjncy; 2890 PetscInt nvtxs; 2891 PetscBool flg_row=PETSC_FALSE; 2892 2893 if (pcbddc->use_local_adj) { 2894 2895 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2896 if (flg_row) { 2897 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2898 pcbddc->computed_rowadj = PETSC_TRUE; 2899 } 2900 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2901 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2902 IS is_dummy; 2903 ISLocalToGlobalMapping l2gmap_dummy; 2904 PetscInt j,sum; 2905 PetscInt *cxadj,*cadjncy; 2906 const PetscInt *idxs; 2907 PCBDDCGraph graph; 2908 PetscBT is_on_boundary; 2909 2910 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2911 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2912 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2913 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2914 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2915 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2916 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2917 if (flg_row) { 2918 graph->xadj = xadj; 2919 graph->adjncy = adjncy; 2920 } 2921 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2922 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2923 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2924 2925 if (pcbddc->dbg_flag) { 2926 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2927 for (i=0;i<graph->ncc;i++) { 2928 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2929 } 2930 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2931 } 2932 2933 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2934 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2935 for (i=0;i<pcis->n_B;i++) { 2936 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2937 } 2938 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2939 2940 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2941 sum = 0; 2942 for (i=0;i<graph->ncc;i++) { 2943 PetscInt sizecc = 0; 2944 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2945 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2946 sizecc++; 2947 } 2948 } 2949 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2950 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2951 cxadj[graph->queue[j]] = sizecc; 2952 } 2953 } 2954 sum += sizecc*sizecc; 2955 } 2956 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2957 sum = 0; 2958 for (i=0;i<pcis->n;i++) { 2959 PetscInt temp = cxadj[i]; 2960 cxadj[i] = sum; 2961 sum += temp; 2962 } 2963 cxadj[pcis->n] = sum; 2964 for (i=0;i<graph->ncc;i++) { 2965 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2966 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2967 PetscInt k,sizecc = 0; 2968 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2969 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2970 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2971 sizecc++; 2972 } 2973 } 2974 } 2975 } 2976 } 2977 if (pcis->n) { 2978 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2979 } else { 2980 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2981 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2982 } 2983 graph->xadj = 0; 2984 graph->adjncy = 0; 2985 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2986 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2987 } 2988 } 2989 2990 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2991 vertex_size = 1; 2992 if (pcbddc->user_provided_isfordofs) { 2993 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2994 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2995 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2996 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2997 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2998 } 2999 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3000 pcbddc->n_ISForDofs = 0; 3001 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3002 } 3003 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3004 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3005 } else { 3006 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3007 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3008 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3009 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3010 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3011 } 3012 } 3013 } 3014 3015 /* Setup of Graph */ 3016 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3017 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3018 } 3019 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3020 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3021 } 3022 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3023 3024 /* Graph's connected components analysis */ 3025 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3026 3027 /* print some info to stdout */ 3028 if (pcbddc->dbg_flag) { 3029 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3030 } 3031 3032 /* mark topography has done */ 3033 pcbddc->recompute_topography = PETSC_FALSE; 3034 PetscFunctionReturn(0); 3035 } 3036 3037 #undef __FUNCT__ 3038 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3039 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3040 { 3041 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3042 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3043 PetscErrorCode ierr; 3044 3045 PetscFunctionBegin; 3046 n = 0; 3047 vertices = 0; 3048 if (pcbddc->ConstraintMatrix) { 3049 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3050 for (i=0;i<local_primal_size;i++) { 3051 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3052 if (size_of_constraint == 1) n++; 3053 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3054 } 3055 if (vertices_idx) { 3056 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3057 n = 0; 3058 for (i=0;i<local_primal_size;i++) { 3059 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3060 if (size_of_constraint == 1) { 3061 vertices[n++]=row_cmat_indices[0]; 3062 } 3063 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3064 } 3065 } 3066 } 3067 *n_vertices = n; 3068 if (vertices_idx) *vertices_idx = vertices; 3069 PetscFunctionReturn(0); 3070 } 3071 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3074 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3075 { 3076 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3077 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3078 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3079 PetscBT touched; 3080 PetscErrorCode ierr; 3081 3082 /* This function assumes that the number of local constraints per connected component 3083 is not greater than the number of nodes defined for the connected component 3084 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3085 PetscFunctionBegin; 3086 n = 0; 3087 constraints_index = 0; 3088 if (pcbddc->ConstraintMatrix) { 3089 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3090 max_size_of_constraint = 0; 3091 for (i=0;i<local_primal_size;i++) { 3092 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3093 if (size_of_constraint > 1) { 3094 n++; 3095 } 3096 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3097 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3098 } 3099 if (constraints_idx) { 3100 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3101 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3102 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3103 n = 0; 3104 for (i=0;i<local_primal_size;i++) { 3105 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3106 if (size_of_constraint > 1) { 3107 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3108 /* find first untouched local node */ 3109 j = 0; 3110 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3111 min_index = row_cmat_global_indices[j]; 3112 min_loc = j; 3113 /* search the minimum among nodes not yet touched on the connected component 3114 since there can be more than one constraint on a single cc */ 3115 for (j=1;j<size_of_constraint;j++) { 3116 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3117 min_index = row_cmat_global_indices[j]; 3118 min_loc = j; 3119 } 3120 } 3121 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3122 constraints_index[n++] = row_cmat_indices[min_loc]; 3123 } 3124 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3125 } 3126 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3127 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3128 } 3129 } 3130 *n_constraints = n; 3131 if (constraints_idx) *constraints_idx = constraints_index; 3132 PetscFunctionReturn(0); 3133 } 3134 3135 #undef __FUNCT__ 3136 #define __FUNCT__ "PCBDDCSubsetNumbering" 3137 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3138 { 3139 Vec local_vec,global_vec; 3140 IS seqis,paris; 3141 VecScatter scatter_ctx; 3142 PetscScalar *array; 3143 PetscInt *temp_global_dofs; 3144 PetscScalar globalsum; 3145 PetscInt i,j,s; 3146 PetscInt nlocals,first_index,old_index,max_local,max_global; 3147 PetscMPIInt rank_prec_comm,size_prec_comm; 3148 PetscInt *dof_sizes,*dof_displs; 3149 PetscBool first_found; 3150 PetscErrorCode ierr; 3151 3152 PetscFunctionBegin; 3153 /* mpi buffers */ 3154 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3155 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3156 j = ( !rank_prec_comm ? size_prec_comm : 0); 3157 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3158 /* get maximum size of subset */ 3159 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3160 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3161 max_local = 0; 3162 for (i=0;i<n_local_dofs;i++) { 3163 if (max_local < temp_global_dofs[i] ) { 3164 max_local = temp_global_dofs[i]; 3165 } 3166 } 3167 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3168 max_global++; 3169 max_local = 0; 3170 for (i=0;i<n_local_dofs;i++) { 3171 if (max_local < local_dofs[i] ) { 3172 max_local = local_dofs[i]; 3173 } 3174 } 3175 max_local++; 3176 /* allocate workspace */ 3177 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3178 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3179 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3180 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3181 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3182 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3183 /* create scatter */ 3184 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3185 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3186 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3187 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3188 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3189 /* init array */ 3190 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3191 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3192 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3193 if (local_dofs_mult) { 3194 for (i=0;i<n_local_dofs;i++) { 3195 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3196 } 3197 } else { 3198 for (i=0;i<n_local_dofs;i++) { 3199 array[local_dofs[i]]=1.0; 3200 } 3201 } 3202 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3203 /* scatter into global vec and get total number of global dofs */ 3204 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3205 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3206 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3207 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3208 /* Fill global_vec with cumulative function for global numbering */ 3209 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3210 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3211 nlocals = 0; 3212 first_index = -1; 3213 first_found = PETSC_FALSE; 3214 for (i=0;i<s;i++) { 3215 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3216 first_found = PETSC_TRUE; 3217 first_index = i; 3218 } 3219 nlocals += (PetscInt)PetscRealPart(array[i]); 3220 } 3221 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3222 if (!rank_prec_comm) { 3223 dof_displs[0]=0; 3224 for (i=1;i<size_prec_comm;i++) { 3225 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3226 } 3227 } 3228 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3229 if (first_found) { 3230 array[first_index] += (PetscScalar)nlocals; 3231 old_index = first_index; 3232 for (i=first_index+1;i<s;i++) { 3233 if (PetscRealPart(array[i]) > 0.1) { 3234 array[i] += array[old_index]; 3235 old_index = i; 3236 } 3237 } 3238 } 3239 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3240 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3241 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3242 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3243 /* get global ordering of local dofs */ 3244 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3245 if (local_dofs_mult) { 3246 for (i=0;i<n_local_dofs;i++) { 3247 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3248 } 3249 } else { 3250 for (i=0;i<n_local_dofs;i++) { 3251 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3252 } 3253 } 3254 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3255 /* free workspace */ 3256 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3257 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3258 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3259 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3260 /* return pointer to global ordering of local dofs */ 3261 *global_numbering_subset = temp_global_dofs; 3262 PetscFunctionReturn(0); 3263 } 3264 3265 #undef __FUNCT__ 3266 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3267 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3268 { 3269 PetscInt i,j; 3270 PetscScalar *alphas; 3271 PetscErrorCode ierr; 3272 3273 PetscFunctionBegin; 3274 /* this implements stabilized Gram-Schmidt */ 3275 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3276 for (i=0;i<n;i++) { 3277 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3278 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3279 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3280 } 3281 ierr = PetscFree(alphas);CHKERRQ(ierr); 3282 PetscFunctionReturn(0); 3283 } 3284 3285 #undef __FUNCT__ 3286 #define __FUNCT__ "MatISGetSubassemblingPattern" 3287 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3288 { 3289 Mat subdomain_adj; 3290 IS new_ranks,ranks_send_to; 3291 MatPartitioning partitioner; 3292 Mat_IS *matis; 3293 PetscInt n_neighs,*neighs,*n_shared,**shared; 3294 PetscInt prank; 3295 PetscMPIInt size,rank,color; 3296 PetscInt *xadj,*adjncy,*oldranks; 3297 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3298 PetscInt i,local_size,threshold=0; 3299 PetscErrorCode ierr; 3300 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3301 PetscSubcomm subcomm; 3302 3303 PetscFunctionBegin; 3304 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3305 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3306 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3307 3308 /* Get info on mapping */ 3309 matis = (Mat_IS*)(mat->data); 3310 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3311 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3312 3313 /* build local CSR graph of subdomains' connectivity */ 3314 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3315 xadj[0] = 0; 3316 xadj[1] = PetscMax(n_neighs-1,0); 3317 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3318 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3319 3320 if (threshold) { 3321 PetscInt xadj_count = 0; 3322 for (i=1;i<n_neighs;i++) { 3323 if (n_shared[i] > threshold) { 3324 adjncy[xadj_count] = neighs[i]; 3325 adjncy_wgt[xadj_count] = n_shared[i]; 3326 xadj_count++; 3327 } 3328 } 3329 xadj[1] = xadj_count; 3330 } else { 3331 if (xadj[1]) { 3332 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3333 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3334 } 3335 } 3336 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3337 if (use_square) { 3338 for (i=0;i<xadj[1];i++) { 3339 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3340 } 3341 } 3342 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3343 3344 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3345 3346 /* 3347 Restrict work on active processes only. 3348 */ 3349 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3350 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3351 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3352 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3353 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3354 if (color) { 3355 ierr = PetscFree(xadj);CHKERRQ(ierr); 3356 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3357 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3358 } else { 3359 PetscInt coarsening_ratio; 3360 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3361 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3362 prank = rank; 3363 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3364 /* 3365 for (i=0;i<size;i++) { 3366 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3367 } 3368 */ 3369 for (i=0;i<xadj[1];i++) { 3370 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3371 } 3372 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3373 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3374 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3375 3376 /* Partition */ 3377 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3378 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3379 if (use_vwgt) { 3380 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3381 v_wgt[0] = local_size; 3382 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3383 } 3384 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3385 coarsening_ratio = size/n_subdomains; 3386 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3387 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3388 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3389 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3390 3391 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3392 if (contiguous) { 3393 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3394 } else { 3395 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3396 } 3397 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3398 /* clean up */ 3399 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3400 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3401 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3402 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3403 } 3404 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3405 3406 /* assemble parallel IS for sends */ 3407 i = 1; 3408 if (color) i=0; 3409 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3410 3411 /* get back IS */ 3412 *is_sends = ranks_send_to; 3413 PetscFunctionReturn(0); 3414 } 3415 3416 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3417 3418 #undef __FUNCT__ 3419 #define __FUNCT__ "MatISSubassemble" 3420 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3421 { 3422 Mat local_mat; 3423 Mat_IS *matis; 3424 IS is_sends_internal; 3425 PetscInt rows,cols,new_local_rows; 3426 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3427 PetscBool ismatis,isdense,newisdense,destroy_mat; 3428 ISLocalToGlobalMapping l2gmap; 3429 PetscInt* l2gmap_indices; 3430 const PetscInt* is_indices; 3431 MatType new_local_type; 3432 /* buffers */ 3433 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3434 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3435 PetscInt *recv_buffer_idxs_local; 3436 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3437 /* MPI */ 3438 MPI_Comm comm,comm_n; 3439 PetscSubcomm subcomm; 3440 PetscMPIInt n_sends,n_recvs,commsize; 3441 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3442 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3443 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3444 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3445 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3446 PetscErrorCode ierr; 3447 3448 PetscFunctionBegin; 3449 /* TODO: add missing checks */ 3450 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3451 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3452 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3453 PetscValidLogicalCollectiveInt(mat,nis,7); 3454 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3455 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3456 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3457 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3458 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3459 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3460 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3461 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3462 PetscInt mrows,mcols,mnrows,mncols; 3463 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3464 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3465 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3466 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3467 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3468 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3469 } 3470 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3471 PetscValidLogicalCollectiveInt(mat,bs,0); 3472 /* prepare IS for sending if not provided */ 3473 if (!is_sends) { 3474 PetscBool pcontig = PETSC_TRUE; 3475 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3476 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3477 } else { 3478 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3479 is_sends_internal = is_sends; 3480 } 3481 3482 /* get pointer of MATIS data */ 3483 matis = (Mat_IS*)mat->data; 3484 3485 /* get comm */ 3486 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3487 3488 /* compute number of sends */ 3489 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3490 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3491 3492 /* compute number of receives */ 3493 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3494 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3495 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3496 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3497 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3498 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3499 ierr = PetscFree(iflags);CHKERRQ(ierr); 3500 3501 /* restrict comm if requested */ 3502 subcomm = 0; 3503 destroy_mat = PETSC_FALSE; 3504 if (restrict_comm) { 3505 PetscMPIInt color,subcommsize; 3506 3507 color = 0; 3508 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3509 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3510 subcommsize = commsize - subcommsize; 3511 /* check if reuse has been requested */ 3512 if (reuse == MAT_REUSE_MATRIX) { 3513 if (*mat_n) { 3514 PetscMPIInt subcommsize2; 3515 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3516 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3517 comm_n = PetscObjectComm((PetscObject)*mat_n); 3518 } else { 3519 comm_n = PETSC_COMM_SELF; 3520 } 3521 } else { /* MAT_INITIAL_MATRIX */ 3522 PetscMPIInt rank; 3523 3524 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3525 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3526 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3527 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3528 comm_n = PetscSubcommChild(subcomm); 3529 } 3530 /* flag to destroy *mat_n if not significative */ 3531 if (color) destroy_mat = PETSC_TRUE; 3532 } else { 3533 comm_n = comm; 3534 } 3535 3536 /* prepare send/receive buffers */ 3537 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3538 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3539 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3540 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3541 if (nis) { 3542 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3543 } 3544 3545 /* Get data from local matrices */ 3546 if (!isdense) { 3547 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3548 /* TODO: See below some guidelines on how to prepare the local buffers */ 3549 /* 3550 send_buffer_vals should contain the raw values of the local matrix 3551 send_buffer_idxs should contain: 3552 - MatType_PRIVATE type 3553 - PetscInt size_of_l2gmap 3554 - PetscInt global_row_indices[size_of_l2gmap] 3555 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3556 */ 3557 } else { 3558 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3559 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3560 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3561 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3562 send_buffer_idxs[1] = i; 3563 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3564 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3565 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3566 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3567 for (i=0;i<n_sends;i++) { 3568 ilengths_vals[is_indices[i]] = len*len; 3569 ilengths_idxs[is_indices[i]] = len+2; 3570 } 3571 } 3572 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3573 /* additional is (if any) */ 3574 if (nis) { 3575 PetscMPIInt psum; 3576 PetscInt j; 3577 for (j=0,psum=0;j<nis;j++) { 3578 PetscInt plen; 3579 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3580 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3581 psum += len+1; /* indices + lenght */ 3582 } 3583 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3584 for (j=0,psum=0;j<nis;j++) { 3585 PetscInt plen; 3586 const PetscInt *is_array_idxs; 3587 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3588 send_buffer_idxs_is[psum] = plen; 3589 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3590 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3591 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3592 psum += plen+1; /* indices + lenght */ 3593 } 3594 for (i=0;i<n_sends;i++) { 3595 ilengths_idxs_is[is_indices[i]] = psum; 3596 } 3597 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3598 } 3599 3600 buf_size_idxs = 0; 3601 buf_size_vals = 0; 3602 buf_size_idxs_is = 0; 3603 for (i=0;i<n_recvs;i++) { 3604 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3605 buf_size_vals += (PetscInt)olengths_vals[i]; 3606 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3607 } 3608 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3609 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3610 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3611 3612 /* get new tags for clean communications */ 3613 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3614 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3615 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3616 3617 /* allocate for requests */ 3618 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3619 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3620 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3621 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3622 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3623 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3624 3625 /* communications */ 3626 ptr_idxs = recv_buffer_idxs; 3627 ptr_vals = recv_buffer_vals; 3628 ptr_idxs_is = recv_buffer_idxs_is; 3629 for (i=0;i<n_recvs;i++) { 3630 source_dest = onodes[i]; 3631 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3632 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3633 ptr_idxs += olengths_idxs[i]; 3634 ptr_vals += olengths_vals[i]; 3635 if (nis) { 3636 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3637 ptr_idxs_is += olengths_idxs_is[i]; 3638 } 3639 } 3640 for (i=0;i<n_sends;i++) { 3641 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3642 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3643 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3644 if (nis) { 3645 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3646 } 3647 } 3648 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3649 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3650 3651 /* assemble new l2g map */ 3652 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3653 ptr_idxs = recv_buffer_idxs; 3654 new_local_rows = 0; 3655 for (i=0;i<n_recvs;i++) { 3656 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3657 ptr_idxs += olengths_idxs[i]; 3658 } 3659 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3660 ptr_idxs = recv_buffer_idxs; 3661 new_local_rows = 0; 3662 for (i=0;i<n_recvs;i++) { 3663 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3664 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3665 ptr_idxs += olengths_idxs[i]; 3666 } 3667 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3668 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3669 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3670 3671 /* infer new local matrix type from received local matrices type */ 3672 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3673 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3674 if (n_recvs) { 3675 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3676 ptr_idxs = recv_buffer_idxs; 3677 for (i=0;i<n_recvs;i++) { 3678 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3679 new_local_type_private = MATAIJ_PRIVATE; 3680 break; 3681 } 3682 ptr_idxs += olengths_idxs[i]; 3683 } 3684 switch (new_local_type_private) { 3685 case MATDENSE_PRIVATE: 3686 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3687 new_local_type = MATSEQAIJ; 3688 bs = 1; 3689 } else { /* if I receive only 1 dense matrix */ 3690 new_local_type = MATSEQDENSE; 3691 bs = 1; 3692 } 3693 break; 3694 case MATAIJ_PRIVATE: 3695 new_local_type = MATSEQAIJ; 3696 bs = 1; 3697 break; 3698 case MATBAIJ_PRIVATE: 3699 new_local_type = MATSEQBAIJ; 3700 break; 3701 case MATSBAIJ_PRIVATE: 3702 new_local_type = MATSEQSBAIJ; 3703 break; 3704 default: 3705 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3706 break; 3707 } 3708 } else { /* by default, new_local_type is seqdense */ 3709 new_local_type = MATSEQDENSE; 3710 bs = 1; 3711 } 3712 3713 /* create MATIS object if needed */ 3714 if (reuse == MAT_INITIAL_MATRIX) { 3715 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3716 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3717 } else { 3718 /* it also destroys the local matrices */ 3719 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3720 } 3721 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3722 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3723 3724 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3725 3726 /* Global to local map of received indices */ 3727 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3728 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3729 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3730 3731 /* restore attributes -> type of incoming data and its size */ 3732 buf_size_idxs = 0; 3733 for (i=0;i<n_recvs;i++) { 3734 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3735 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3736 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3737 } 3738 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3739 3740 /* set preallocation */ 3741 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3742 if (!newisdense) { 3743 PetscInt *new_local_nnz=0; 3744 3745 ptr_vals = recv_buffer_vals; 3746 ptr_idxs = recv_buffer_idxs_local; 3747 if (n_recvs) { 3748 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3749 } 3750 for (i=0;i<n_recvs;i++) { 3751 PetscInt j; 3752 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3753 for (j=0;j<*(ptr_idxs+1);j++) { 3754 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3755 } 3756 } else { 3757 /* TODO */ 3758 } 3759 ptr_idxs += olengths_idxs[i]; 3760 } 3761 if (new_local_nnz) { 3762 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3763 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3764 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3765 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3766 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3767 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3768 } else { 3769 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3770 } 3771 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3772 } else { 3773 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3774 } 3775 3776 /* set values */ 3777 ptr_vals = recv_buffer_vals; 3778 ptr_idxs = recv_buffer_idxs_local; 3779 for (i=0;i<n_recvs;i++) { 3780 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3781 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3782 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3783 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3784 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3785 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3786 } else { 3787 /* TODO */ 3788 } 3789 ptr_idxs += olengths_idxs[i]; 3790 ptr_vals += olengths_vals[i]; 3791 } 3792 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3793 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3794 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3795 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3796 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3797 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3798 3799 #if 0 3800 if (!restrict_comm) { /* check */ 3801 Vec lvec,rvec; 3802 PetscReal infty_error; 3803 3804 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3805 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3806 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3807 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3808 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3809 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3810 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3811 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3812 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3813 } 3814 #endif 3815 3816 /* assemble new additional is (if any) */ 3817 if (nis) { 3818 PetscInt **temp_idxs,*count_is,j,psum; 3819 3820 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3821 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3822 ptr_idxs = recv_buffer_idxs_is; 3823 psum = 0; 3824 for (i=0;i<n_recvs;i++) { 3825 for (j=0;j<nis;j++) { 3826 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3827 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3828 psum += plen; 3829 ptr_idxs += plen+1; /* shift pointer to received data */ 3830 } 3831 } 3832 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3833 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3834 for (i=1;i<nis;i++) { 3835 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3836 } 3837 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3838 ptr_idxs = recv_buffer_idxs_is; 3839 for (i=0;i<n_recvs;i++) { 3840 for (j=0;j<nis;j++) { 3841 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3842 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3843 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3844 ptr_idxs += plen+1; /* shift pointer to received data */ 3845 } 3846 } 3847 for (i=0;i<nis;i++) { 3848 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3849 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3850 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3851 } 3852 ierr = PetscFree(count_is);CHKERRQ(ierr); 3853 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3854 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3855 } 3856 /* free workspace */ 3857 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3858 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3859 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3860 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3861 if (isdense) { 3862 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3863 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3864 } else { 3865 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3866 } 3867 if (nis) { 3868 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3869 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3870 } 3871 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3872 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3873 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3874 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3875 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3876 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3877 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3878 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3879 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3880 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3881 ierr = PetscFree(onodes);CHKERRQ(ierr); 3882 if (nis) { 3883 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3884 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3885 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3886 } 3887 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3888 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3889 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3890 for (i=0;i<nis;i++) { 3891 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3892 } 3893 } 3894 PetscFunctionReturn(0); 3895 } 3896 3897 /* temporary hack into ksp private data structure */ 3898 #include <petsc/private/kspimpl.h> 3899 3900 #undef __FUNCT__ 3901 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3902 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3903 { 3904 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3905 PC_IS *pcis = (PC_IS*)pc->data; 3906 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3907 MatNullSpace CoarseNullSpace=NULL; 3908 ISLocalToGlobalMapping coarse_islg; 3909 IS coarse_is,*isarray; 3910 PetscInt i,im_active=-1,active_procs=-1; 3911 PetscInt nis,nisdofs,nisneu; 3912 PC pc_temp; 3913 PCType coarse_pc_type; 3914 KSPType coarse_ksp_type; 3915 PetscBool multilevel_requested,multilevel_allowed; 3916 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3917 Mat t_coarse_mat_is; 3918 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3919 PetscMPIInt all_procs; 3920 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3921 PetscBool compute_vecs = PETSC_FALSE; 3922 PetscScalar *array; 3923 PetscErrorCode ierr; 3924 3925 PetscFunctionBegin; 3926 /* Assign global numbering to coarse dofs */ 3927 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3928 PetscInt ocoarse_size; 3929 compute_vecs = PETSC_TRUE; 3930 ocoarse_size = pcbddc->coarse_size; 3931 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3932 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3933 /* see if we can avoid some work */ 3934 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3935 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3936 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3937 coarse_reuse = PETSC_FALSE; 3938 } else { /* we can safely reuse already computed coarse matrix */ 3939 coarse_reuse = PETSC_TRUE; 3940 } 3941 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3942 coarse_reuse = PETSC_FALSE; 3943 } 3944 /* reset any subassembling information */ 3945 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3946 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3947 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3948 coarse_reuse = PETSC_TRUE; 3949 } 3950 3951 /* count "active" (i.e. with positive local size) and "void" processes */ 3952 im_active = !!(pcis->n); 3953 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3954 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3955 void_procs = all_procs-active_procs; 3956 csin_type_simple = PETSC_TRUE; 3957 redist = PETSC_FALSE; 3958 if (pcbddc->current_level && void_procs) { 3959 csin_ml = PETSC_TRUE; 3960 ncoarse_ml = void_procs; 3961 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3962 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3963 csin_ds = PETSC_TRUE; 3964 ncoarse_ds = pcbddc->redistribute_coarse; 3965 redist = PETSC_TRUE; 3966 } else { 3967 csin_ds = PETSC_TRUE; 3968 ncoarse_ds = active_procs; 3969 redist = PETSC_TRUE; 3970 } 3971 } else { 3972 csin_ml = PETSC_FALSE; 3973 ncoarse_ml = all_procs; 3974 if (void_procs) { 3975 csin_ds = PETSC_TRUE; 3976 ncoarse_ds = void_procs; 3977 csin_type_simple = PETSC_FALSE; 3978 } else { 3979 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3980 csin_ds = PETSC_TRUE; 3981 ncoarse_ds = pcbddc->redistribute_coarse; 3982 redist = PETSC_TRUE; 3983 } else { 3984 csin_ds = PETSC_FALSE; 3985 ncoarse_ds = all_procs; 3986 } 3987 } 3988 } 3989 3990 /* 3991 test if we can go multilevel: three conditions must be satisfied: 3992 - we have not exceeded the number of levels requested 3993 - we can actually subassemble the active processes 3994 - we can find a suitable number of MPI processes where we can place the subassembled problem 3995 */ 3996 multilevel_allowed = PETSC_FALSE; 3997 multilevel_requested = PETSC_FALSE; 3998 if (pcbddc->current_level < pcbddc->max_levels) { 3999 multilevel_requested = PETSC_TRUE; 4000 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4001 multilevel_allowed = PETSC_FALSE; 4002 } else { 4003 multilevel_allowed = PETSC_TRUE; 4004 } 4005 } 4006 /* determine number of process partecipating to coarse solver */ 4007 if (multilevel_allowed) { 4008 ncoarse = ncoarse_ml; 4009 csin = csin_ml; 4010 redist = PETSC_FALSE; 4011 } else { 4012 ncoarse = ncoarse_ds; 4013 csin = csin_ds; 4014 } 4015 4016 /* creates temporary l2gmap and IS for coarse indexes */ 4017 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4018 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4019 4020 /* creates temporary MATIS object for coarse matrix */ 4021 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4022 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4023 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4024 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4025 #if 0 4026 { 4027 PetscViewer viewer; 4028 char filename[256]; 4029 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4030 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4031 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4032 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4033 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4034 } 4035 #endif 4036 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4037 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4038 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4039 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4040 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4041 4042 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4043 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4044 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4045 const PetscInt *idxs; 4046 ISLocalToGlobalMapping tmap; 4047 4048 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4049 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4050 /* allocate space for temporary storage */ 4051 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4052 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4053 /* allocate for IS array */ 4054 nisdofs = pcbddc->n_ISForDofsLocal; 4055 nisneu = !!pcbddc->NeumannBoundariesLocal; 4056 nis = nisdofs + nisneu; 4057 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4058 /* dofs splitting */ 4059 for (i=0;i<nisdofs;i++) { 4060 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4061 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4062 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4063 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4064 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4065 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4066 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4067 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4068 } 4069 /* neumann boundaries */ 4070 if (pcbddc->NeumannBoundariesLocal) { 4071 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4072 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4073 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4074 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4075 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4076 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4077 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4078 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4079 } 4080 /* free memory */ 4081 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4082 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4083 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4084 } else { 4085 nis = 0; 4086 nisdofs = 0; 4087 nisneu = 0; 4088 isarray = NULL; 4089 } 4090 /* destroy no longer needed map */ 4091 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4092 4093 /* restrict on coarse candidates (if needed) */ 4094 coarse_mat_is = NULL; 4095 if (csin) { 4096 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4097 if (redist) { 4098 PetscMPIInt rank; 4099 PetscInt spc,n_spc_p1,dest[1],destsize; 4100 4101 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4102 spc = active_procs/ncoarse; 4103 n_spc_p1 = active_procs%ncoarse; 4104 if (im_active) { 4105 destsize = 1; 4106 if (rank > n_spc_p1*(spc+1)-1) { 4107 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4108 } else { 4109 dest[0] = rank/(spc+1); 4110 } 4111 } else { 4112 destsize = 0; 4113 } 4114 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4115 } else if (csin_type_simple) { 4116 PetscMPIInt rank; 4117 PetscInt issize,isidx; 4118 4119 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4120 if (im_active) { 4121 issize = 1; 4122 isidx = (PetscInt)rank; 4123 } else { 4124 issize = 0; 4125 isidx = -1; 4126 } 4127 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4128 } else { /* get a suitable subassembling pattern from MATIS code */ 4129 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4130 } 4131 4132 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4133 if (!redist || ncoarse <= void_procs) { 4134 PetscInt ncoarse_cand,tissize,*nisindices; 4135 PetscInt *coarse_candidates; 4136 const PetscInt* tisindices; 4137 4138 /* get coarse candidates' ranks in pc communicator */ 4139 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4140 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4141 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4142 if (!coarse_candidates[i]) { 4143 coarse_candidates[ncoarse_cand++]=i; 4144 } 4145 } 4146 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4147 4148 4149 if (pcbddc->dbg_flag) { 4150 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4152 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4153 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4154 for (i=0;i<ncoarse_cand;i++) { 4155 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4156 } 4157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4158 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4159 } 4160 /* shift the pattern on coarse candidates */ 4161 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4162 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4163 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4164 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4165 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4166 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4167 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4168 } 4169 if (pcbddc->dbg_flag) { 4170 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4171 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4172 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4173 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4174 } 4175 } 4176 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4177 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4178 } else { 4179 if (pcbddc->dbg_flag) { 4180 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4181 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4183 } 4184 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4185 coarse_mat_is = t_coarse_mat_is; 4186 } 4187 4188 /* create local to global scatters for coarse problem */ 4189 if (compute_vecs) { 4190 PetscInt lrows; 4191 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4192 if (coarse_mat_is) { 4193 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4194 } else { 4195 lrows = 0; 4196 } 4197 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4198 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4199 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4200 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4201 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4202 } 4203 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4204 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4205 4206 /* set defaults for coarse KSP and PC */ 4207 if (multilevel_allowed) { 4208 coarse_ksp_type = KSPRICHARDSON; 4209 coarse_pc_type = PCBDDC; 4210 } else { 4211 coarse_ksp_type = KSPPREONLY; 4212 coarse_pc_type = PCREDUNDANT; 4213 } 4214 4215 /* print some info if requested */ 4216 if (pcbddc->dbg_flag) { 4217 if (!multilevel_allowed) { 4218 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4219 if (multilevel_requested) { 4220 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4221 } else if (pcbddc->max_levels) { 4222 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4223 } 4224 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4225 } 4226 } 4227 4228 /* create the coarse KSP object only once with defaults */ 4229 if (coarse_mat_is) { 4230 MatReuse coarse_mat_reuse; 4231 PetscViewer dbg_viewer = NULL; 4232 if (pcbddc->dbg_flag) { 4233 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4234 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4235 } 4236 if (!pcbddc->coarse_ksp) { 4237 char prefix[256],str_level[16]; 4238 size_t len; 4239 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4240 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4241 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4242 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4243 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4244 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4245 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4246 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4247 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4248 /* prefix */ 4249 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4250 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4251 if (!pcbddc->current_level) { 4252 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4253 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4254 } else { 4255 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4256 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4257 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4258 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4259 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4260 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4261 } 4262 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4263 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4264 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4265 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4266 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4267 /* allow user customization */ 4268 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4269 } 4270 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4271 if (nisdofs) { 4272 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4273 for (i=0;i<nisdofs;i++) { 4274 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4275 } 4276 } 4277 if (nisneu) { 4278 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4279 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4280 } 4281 4282 /* get some info after set from options */ 4283 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4284 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4285 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4286 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4287 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4288 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4289 isbddc = PETSC_FALSE; 4290 } 4291 if (isredundant) { 4292 KSP inner_ksp; 4293 PC inner_pc; 4294 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4295 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4296 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4297 } 4298 4299 /* assemble coarse matrix */ 4300 if (coarse_reuse) { 4301 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4302 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4303 coarse_mat_reuse = MAT_REUSE_MATRIX; 4304 } else { 4305 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4306 } 4307 if (isbddc || isnn) { 4308 if (pcbddc->coarsening_ratio > 1) { 4309 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4310 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4311 if (pcbddc->dbg_flag) { 4312 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4313 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4314 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4315 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4316 } 4317 } 4318 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4319 } else { 4320 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4321 coarse_mat = coarse_mat_is; 4322 } 4323 } else { 4324 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4325 } 4326 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4327 4328 /* propagate symmetry info to coarse matrix */ 4329 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4330 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4331 4332 /* set operators */ 4333 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4334 if (pcbddc->dbg_flag) { 4335 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4336 } 4337 } else { /* processes non partecipating to coarse solver (if any) */ 4338 coarse_mat = 0; 4339 } 4340 ierr = PetscFree(isarray);CHKERRQ(ierr); 4341 #if 0 4342 { 4343 PetscViewer viewer; 4344 char filename[256]; 4345 sprintf(filename,"coarse_mat.m"); 4346 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4347 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4348 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4349 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4350 } 4351 #endif 4352 4353 /* Compute coarse null space (special handling by BDDC only) */ 4354 if (pcbddc->NullSpace) { 4355 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4356 } 4357 4358 if (pcbddc->coarse_ksp) { 4359 Vec crhs,csol; 4360 PetscBool ispreonly; 4361 if (CoarseNullSpace) { 4362 if (isbddc) { 4363 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4364 } else { 4365 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4366 } 4367 } 4368 /* setup coarse ksp */ 4369 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4370 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4371 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4372 /* hack */ 4373 if (!csol) { 4374 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4375 } 4376 if (!crhs) { 4377 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4378 } 4379 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4380 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4381 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4382 KSP check_ksp; 4383 KSPType check_ksp_type; 4384 PC check_pc; 4385 Vec check_vec,coarse_vec; 4386 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4387 PetscInt its; 4388 PetscBool compute_eigs; 4389 PetscReal *eigs_r,*eigs_c; 4390 PetscInt neigs; 4391 const char *prefix; 4392 4393 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4394 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4395 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4396 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4397 if (ispreonly) { 4398 check_ksp_type = KSPPREONLY; 4399 compute_eigs = PETSC_FALSE; 4400 } else { 4401 check_ksp_type = KSPGMRES; 4402 compute_eigs = PETSC_TRUE; 4403 } 4404 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4405 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4406 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4407 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4408 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4409 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4410 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4411 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4412 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4413 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4414 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4415 /* create random vec */ 4416 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4417 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4418 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4419 if (CoarseNullSpace) { 4420 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4421 } 4422 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4423 /* solve coarse problem */ 4424 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4425 if (CoarseNullSpace) { 4426 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4427 } 4428 /* set eigenvalue estimation if preonly has not been requested */ 4429 if (compute_eigs) { 4430 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4431 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4432 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4433 lambda_max = eigs_r[neigs-1]; 4434 lambda_min = eigs_r[0]; 4435 if (pcbddc->use_coarse_estimates) { 4436 if (lambda_max>lambda_min) { 4437 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4438 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4439 } 4440 } 4441 } 4442 4443 /* check coarse problem residual error */ 4444 if (pcbddc->dbg_flag) { 4445 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4446 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4447 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4448 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4449 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4450 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4451 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4452 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4453 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4454 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4455 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4456 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4457 if (compute_eigs) { 4458 PetscReal lambda_max_s,lambda_min_s; 4459 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4460 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4461 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4462 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4463 for (i=0;i<neigs;i++) { 4464 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4465 } 4466 } 4467 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4468 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4469 } 4470 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4471 if (compute_eigs) { 4472 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4473 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4474 } 4475 } 4476 } 4477 /* print additional info */ 4478 if (pcbddc->dbg_flag) { 4479 /* waits until all processes reaches this point */ 4480 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4483 } 4484 4485 /* free memory */ 4486 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4487 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4488 PetscFunctionReturn(0); 4489 } 4490 4491 #undef __FUNCT__ 4492 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4493 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4494 { 4495 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4496 PC_IS* pcis = (PC_IS*)pc->data; 4497 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4498 PetscInt i,coarse_size=0; 4499 PetscInt *local_primal_indices=NULL; 4500 PetscErrorCode ierr; 4501 4502 PetscFunctionBegin; 4503 /* Compute global number of coarse dofs */ 4504 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4505 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4506 } 4507 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4508 4509 /* check numbering */ 4510 if (pcbddc->dbg_flag) { 4511 PetscScalar coarsesum,*array; 4512 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4513 4514 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4515 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4516 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4517 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4518 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4519 for (i=0;i<pcbddc->local_primal_size;i++) { 4520 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4521 } 4522 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4523 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4524 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4525 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4526 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4527 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4528 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4529 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4530 for (i=0;i<pcis->n;i++) { 4531 if (array[i] == 1.0) { 4532 set_error = PETSC_TRUE; 4533 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4534 } 4535 } 4536 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4537 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4538 for (i=0;i<pcis->n;i++) { 4539 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4540 } 4541 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4542 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4543 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4544 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4545 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4546 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4547 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4548 PetscInt *gidxs; 4549 4550 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4551 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4552 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4553 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4554 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4555 for (i=0;i<pcbddc->local_primal_size;i++) { 4556 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4557 } 4558 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4559 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4560 } 4561 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4562 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4563 } 4564 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4565 /* get back data */ 4566 *coarse_size_n = coarse_size; 4567 *local_primal_indices_n = local_primal_indices; 4568 PetscFunctionReturn(0); 4569 } 4570 4571 #undef __FUNCT__ 4572 #define __FUNCT__ "PCBDDCGlobalToLocal" 4573 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4574 { 4575 IS localis_t; 4576 PetscInt i,lsize,*idxs,n; 4577 PetscScalar *vals; 4578 PetscErrorCode ierr; 4579 4580 PetscFunctionBegin; 4581 /* get indices in local ordering exploiting local to global map */ 4582 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4583 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4584 for (i=0;i<lsize;i++) vals[i] = 1.0; 4585 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4586 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4587 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4588 if (idxs) { /* multilevel guard */ 4589 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4590 } 4591 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4592 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4593 ierr = PetscFree(vals);CHKERRQ(ierr); 4594 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4595 /* now compute set in local ordering */ 4596 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4597 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4598 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4599 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4600 for (i=0,lsize=0;i<n;i++) { 4601 if (PetscRealPart(vals[i]) > 0.5) { 4602 lsize++; 4603 } 4604 } 4605 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4606 for (i=0,lsize=0;i<n;i++) { 4607 if (PetscRealPart(vals[i]) > 0.5) { 4608 idxs[lsize++] = i; 4609 } 4610 } 4611 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4612 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4613 *localis = localis_t; 4614 PetscFunctionReturn(0); 4615 } 4616 4617 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4618 #undef __FUNCT__ 4619 #define __FUNCT__ "PCBDDCMatMult_Private" 4620 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4621 { 4622 PCBDDCChange_ctx change_ctx; 4623 PetscErrorCode ierr; 4624 4625 PetscFunctionBegin; 4626 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4627 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4628 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4629 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4630 PetscFunctionReturn(0); 4631 } 4632 4633 #undef __FUNCT__ 4634 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4635 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4636 { 4637 PCBDDCChange_ctx change_ctx; 4638 PetscErrorCode ierr; 4639 4640 PetscFunctionBegin; 4641 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4642 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4643 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4644 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4645 PetscFunctionReturn(0); 4646 } 4647 4648 #undef __FUNCT__ 4649 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4650 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4651 { 4652 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4653 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4654 PetscInt *used_xadj,*used_adjncy; 4655 PetscBool free_used_adj; 4656 PetscErrorCode ierr; 4657 4658 PetscFunctionBegin; 4659 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4660 free_used_adj = PETSC_FALSE; 4661 if (pcbddc->sub_schurs_layers == -1) { 4662 used_xadj = NULL; 4663 used_adjncy = NULL; 4664 } else { 4665 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4666 used_xadj = pcbddc->mat_graph->xadj; 4667 used_adjncy = pcbddc->mat_graph->adjncy; 4668 } else if (pcbddc->computed_rowadj) { 4669 used_xadj = pcbddc->mat_graph->xadj; 4670 used_adjncy = pcbddc->mat_graph->adjncy; 4671 } else { 4672 PetscBool flg_row=PETSC_FALSE; 4673 const PetscInt *xadj,*adjncy; 4674 PetscInt nvtxs; 4675 4676 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4677 if (flg_row) { 4678 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4679 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4680 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4681 free_used_adj = PETSC_TRUE; 4682 } else { 4683 pcbddc->sub_schurs_layers = -1; 4684 used_xadj = NULL; 4685 used_adjncy = NULL; 4686 } 4687 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4688 } 4689 } 4690 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4691 4692 /* free adjacency */ 4693 if (free_used_adj) { 4694 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4695 } 4696 PetscFunctionReturn(0); 4697 } 4698 4699 #undef __FUNCT__ 4700 #define __FUNCT__ "PCBDDCInitSubSchurs" 4701 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4702 { 4703 PC_IS *pcis=(PC_IS*)pc->data; 4704 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4705 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4706 PCBDDCGraph graph; 4707 Mat S_j; 4708 PetscErrorCode ierr; 4709 4710 PetscFunctionBegin; 4711 /* attach interface graph for determining subsets */ 4712 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4713 IS verticesIS; 4714 4715 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4716 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4717 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4718 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4719 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4720 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4721 /* 4722 if (pcbddc->dbg_flag) { 4723 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4724 } 4725 */ 4726 } else { 4727 graph = pcbddc->mat_graph; 4728 } 4729 4730 /* Create Schur complement matrix */ 4731 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4732 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4733 4734 /* sub_schurs init */ 4735 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4736 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4737 /* free graph struct */ 4738 if (pcbddc->sub_schurs_rebuild) { 4739 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4740 } 4741 PetscFunctionReturn(0); 4742 } 4743