1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscScalar *Smult,*Seigv; 20 PetscReal *eigs,thresh; 21 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1; 60 nmax = PetscMax(nmin,nmax); 61 62 /* allocate lapack workspace */ 63 cum = cum2 = 0; 64 maxneigs = 0; 65 for (i=0;i<sub_schurs->n_subs;i++) { 66 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 67 PetscInt n,subset_size; 68 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 69 n = PetscMin(subset_size,nmax); 70 cum += subset_size*n; 71 cum2 += n; 72 maxneigs = PetscMax(maxneigs,n); 73 } 74 } 75 76 if (mss) { 77 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 78 PetscBLASInt B_itype = 1; 79 PetscBLASInt B_N = mss; 80 PetscReal zero = 0.0; 81 PetscReal eps = 0.0; /* dlamch? */ 82 83 B_lwork = -1; 84 S = NULL; 85 St = NULL; 86 eigs = NULL; 87 eigv = NULL; 88 B_iwork = NULL; 89 B_ifail = NULL; 90 thresh = 1.0; 91 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 92 #if defined(PETSC_USE_COMPLEX) 93 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 94 #else 95 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 96 #endif 97 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 98 ierr = PetscFPTrapPop();CHKERRQ(ierr); 99 } else { 100 /* TODO */ 101 } 102 } else { 103 lwork = 0; 104 } 105 106 nv = 0; 107 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */ 108 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 109 } 110 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 111 ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs, 112 B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 113 #if defined(PETSC_USE_COMPLEX) 114 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 115 #endif 116 ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr); 117 ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 118 nv+cum2+1,&pcbddc->adaptive_constraints_ptrs, 119 nv+cum,&pcbddc->adaptive_constraints_idxs, 120 nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 121 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 122 123 maxneigs = 0; 124 cum = cum2 = cumarray = 0; 125 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 126 const PetscInt *idxs; 127 128 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 129 for (cum=0;cum<nv;cum++) { 130 pcbddc->adaptive_constraints_n[cum] = 1; 131 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 132 pcbddc->adaptive_constraints_ptrs[cum] = cum; 133 pcbddc->adaptive_constraints_data[cum] = 1.0; 134 } 135 cum2 = cum; 136 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 137 } 138 139 if (mss) { /* multilevel */ 140 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 141 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 142 } 143 144 for (i=0;i<sub_schurs->n_subs;i++) { 145 PetscInt j,subset_size; 146 147 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 148 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 149 const PetscInt *idxs; 150 PetscScalar one = 1.0,scalar_zero = 0.0; 151 PetscReal zero=0.0; 152 PetscBLASInt B_N; 153 154 /* S should be copied since we need it for deluxe scaling */ 155 if (sub_schurs->is_hermitian) { 156 PetscInt j; 157 for (j=0;j<subset_size;j++) { 158 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 159 } 160 for (j=0;j<subset_size;j++) { 161 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 162 } 163 } else { 164 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 165 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 166 } 167 /* is always this the right matrix? */ 168 ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 169 170 /* we could reuse space already allocated when building sum_S_Ej_tilda_all */ 171 /* St = Starray+cumarray; */ 172 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 173 if (pcbddc->adaptive_threshold > 1.0) { /* this is an heuristic for edges */ 174 thresh = (1.*pcbddc->mat_graph->count[idxs[0]])/(pcbddc->adaptive_threshold); 175 } else { 176 thresh = 1.0; 177 } 178 179 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 180 PetscBLASInt B_itype = 1; 181 PetscBLASInt B_IL = 1, B_IU; 182 PetscReal eps = -1.0; /* dlamch? */ 183 PetscInt nmin_s; 184 185 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 186 /* ask for eigenvalues lower than thresh */ 187 if (pcbddc->dbg_flag) { 188 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 189 } 190 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 191 #if defined(PETSC_USE_COMPLEX) 192 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 193 #else 194 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 195 #endif 196 ierr = PetscFPTrapPop();CHKERRQ(ierr); 197 if (B_ierr) { 198 if (B_ierr < 0 ) { 199 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 200 } else if (B_ierr <= B_N) { 201 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 202 } else { 203 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 204 } 205 } 206 207 if (B_neigs > nmax) { 208 if (pcbddc->dbg_flag) { 209 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 210 } 211 B_neigs = nmax; 212 } 213 214 nmin_s = PetscMin(nmin,B_N); 215 if (B_neigs < nmin_s) { 216 PetscBLASInt B_neigs2; 217 218 B_IL = B_neigs + 1; 219 ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr); 220 if (pcbddc->dbg_flag) { 221 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 222 } 223 if (sub_schurs->is_hermitian) { 224 PetscInt j; 225 for (j=0;j<subset_size;j++) { 226 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 227 } 228 for (j=0;j<subset_size;j++) { 229 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 230 } 231 } else { 232 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 233 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 236 #if defined(PETSC_USE_COMPLEX) 237 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 238 #else 239 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 240 #endif 241 ierr = PetscFPTrapPop();CHKERRQ(ierr); 242 B_neigs += B_neigs2; 243 } 244 if (B_ierr) { 245 if (B_ierr < 0 ) { 246 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 247 } else if (B_ierr <= B_N) { 248 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 249 } else { 250 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 251 } 252 } 253 if (pcbddc->dbg_flag) { 254 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 255 for (j=0;j<B_neigs;j++) { 256 if (eigs[j] == 0.0) { 257 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 258 } else { 259 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1.0/eigs[j]); 260 } 261 } 262 } 263 } else { 264 /* TODO */ 265 } 266 267 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 268 if (B_neigs) { 269 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N)); 270 } 271 maxneigs = PetscMax(B_neigs,maxneigs); 272 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 273 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 274 275 if (pcbddc->dbg_flag > 1) { 276 PetscInt ii; 277 for (ii=0;ii<B_neigs;ii++) { 278 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 279 for (j=0;j<B_N;j++) { 280 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]); 281 } 282 } 283 } 284 for (j=0;j<B_neigs;j++) { 285 #if 0 286 { 287 PetscBLASInt Blas_N,Blas_one = 1.0; 288 PetscScalar norm; 289 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 290 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 291 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 292 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 293 } else { 294 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 295 } 296 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one)); 297 } 298 #endif 299 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 300 pcbddc->adaptive_constraints_ptrs[cum++] = cum2; 301 cum2 += subset_size; 302 } 303 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 304 } 305 /* shift for next computation */ 306 cumarray += subset_size*subset_size; 307 } 308 if (pcbddc->dbg_flag) { 309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 310 } 311 pcbddc->adaptive_constraints_ptrs[cum] = cum2; 312 ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr); 313 314 if (mss) { 315 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 316 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 317 } 318 ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 319 #if defined(PETSC_USE_COMPLEX) 320 ierr = PetscFree(rwork);CHKERRQ(ierr); 321 #endif 322 if (pcbddc->dbg_flag) { 323 PetscInt maxneigs_r; 324 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 325 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 326 } 327 PetscFunctionReturn(0); 328 } 329 330 #undef __FUNCT__ 331 #define __FUNCT__ "PCBDDCSetUpSolvers" 332 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 333 { 334 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 335 PetscScalar *coarse_submat_vals; 336 PetscErrorCode ierr; 337 338 PetscFunctionBegin; 339 /* Setup local scatters R_to_B and (optionally) R_to_D */ 340 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 341 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 342 343 /* Setup local neumann solver ksp_R */ 344 /* PCBDDCSetUpLocalScatters should be called first! */ 345 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 346 347 /* Change global null space passed in by the user if change of basis has been requested */ 348 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 349 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 350 } 351 352 /* 353 Setup local correction and local part of coarse basis. 354 Gives back the dense local part of the coarse matrix in column major ordering 355 */ 356 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 357 358 /* Compute total number of coarse nodes and setup coarse solver */ 359 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 360 361 /* free */ 362 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "PCBDDCResetCustomization" 368 PetscErrorCode PCBDDCResetCustomization(PC pc) 369 { 370 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 371 PetscErrorCode ierr; 372 373 PetscFunctionBegin; 374 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 375 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 376 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 377 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 378 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 379 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 380 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 381 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 382 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 383 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 384 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 #undef __FUNCT__ 389 #define __FUNCT__ "PCBDDCResetTopography" 390 PetscErrorCode PCBDDCResetTopography(PC pc) 391 { 392 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 397 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 398 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 399 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 400 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "PCBDDCResetSolvers" 406 PetscErrorCode PCBDDCResetSolvers(PC pc) 407 { 408 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 409 PetscScalar *array; 410 PetscErrorCode ierr; 411 412 PetscFunctionBegin; 413 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 414 if (pcbddc->coarse_phi_B) { 415 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 416 ierr = PetscFree(array);CHKERRQ(ierr); 417 } 418 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 419 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 420 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 421 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 422 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 423 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 424 if (pcbddc->local_auxmat2) { 425 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 426 ierr = PetscFree(array);CHKERRQ(ierr); 427 } 428 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 430 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 432 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 433 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 434 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 435 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 436 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 437 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 438 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 439 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 440 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 441 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 442 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 443 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 449 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 450 { 451 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 452 PC_IS *pcis = (PC_IS*)pc->data; 453 VecType impVecType; 454 PetscInt n_constraints,n_R,old_size; 455 PetscErrorCode ierr; 456 457 PetscFunctionBegin; 458 if (!pcbddc->ConstraintMatrix) { 459 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 460 } 461 /* get sizes */ 462 n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices; 463 n_R = pcis->n-pcbddc->n_actual_vertices; 464 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 465 /* local work vectors (try to avoid unneeded work)*/ 466 /* R nodes */ 467 old_size = -1; 468 if (pcbddc->vec1_R) { 469 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 470 } 471 if (n_R != old_size) { 472 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 473 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 474 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 475 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 476 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 477 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 478 } 479 /* local primal dofs */ 480 old_size = -1; 481 if (pcbddc->vec1_P) { 482 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 483 } 484 if (pcbddc->local_primal_size != old_size) { 485 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 486 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 487 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 488 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 489 } 490 /* local explicit constraints */ 491 old_size = -1; 492 if (pcbddc->vec1_C) { 493 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 494 } 495 if (n_constraints && n_constraints != old_size) { 496 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 497 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 498 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 499 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 500 } 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "PCBDDCSetUpCorrection" 506 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 507 { 508 PetscErrorCode ierr; 509 /* pointers to pcis and pcbddc */ 510 PC_IS* pcis = (PC_IS*)pc->data; 511 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 512 /* submatrices of local problem */ 513 Mat A_RV,A_VR,A_VV; 514 /* submatrices of local coarse problem */ 515 Mat S_VV,S_CV,S_VC,S_CC; 516 /* working matrices */ 517 Mat C_CR; 518 /* additional working stuff */ 519 PC pc_R; 520 Mat F; 521 PetscBool isLU,isCHOL,isILU; 522 523 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 524 PetscScalar *work; 525 PetscInt *idx_V_B; 526 PetscInt n,n_vertices,n_constraints; 527 PetscInt i,n_R,n_D,n_B; 528 PetscBool unsymmetric_check; 529 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 530 MatType impMatType; 531 /* some shortcuts to scalars */ 532 PetscScalar one=1.0,m_one=-1.0; 533 534 PetscFunctionBegin; 535 /* get number of vertices (corners plus constraints with change of basis) 536 pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */ 537 n_vertices = pcbddc->n_actual_vertices; 538 n_constraints = pcbddc->local_primal_size-n_vertices; 539 /* Set Non-overlapping dimensions */ 540 n_B = pcis->n_B; n_D = pcis->n - n_B; 541 n_R = pcis->n-n_vertices; 542 543 /* Set types for local objects needed by BDDC precondtioner */ 544 impMatType = MATSEQDENSE; 545 546 /* vertices in boundary numbering */ 547 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 548 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 549 if (i != n_vertices) { 550 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 551 } 552 553 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 554 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 555 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 556 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 557 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 558 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 559 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 560 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 562 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 563 564 unsymmetric_check = PETSC_FALSE; 565 /* allocate workspace */ 566 n = 0; 567 if (n_constraints) { 568 n += n_R*n_constraints; 569 } 570 if (n_vertices) { 571 n = PetscMax(2*n_R*n_vertices,n); 572 } 573 if (!pcbddc->issym) { 574 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 575 unsymmetric_check = PETSC_TRUE; 576 } 577 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 578 579 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 580 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 581 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 582 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 583 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 584 if (isLU || isILU || isCHOL) { 585 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 586 } else { 587 F = NULL; 588 } 589 590 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 591 if (n_constraints) { 592 Mat M1,M2,M3; 593 IS is_aux; 594 /* see if we can save some allocations */ 595 if (pcbddc->local_auxmat2) { 596 PetscInt on_R,on_constraints; 597 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 598 if (on_R != n_R || on_constraints != n_constraints) { 599 PetscScalar *marray; 600 601 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 602 ierr = PetscFree(marray);CHKERRQ(ierr); 603 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 604 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 605 } 606 } 607 /* auxiliary matrices */ 608 if (!pcbddc->local_auxmat2) { 609 PetscScalar *marray; 610 611 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 612 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 marray += n_R*n_constraints; 614 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 615 } 616 617 /* Extract constraints on R nodes: C_{CR} */ 618 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 619 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 620 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 621 622 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 623 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 624 for (i=0;i<n_constraints;i++) { 625 const PetscScalar *row_cmat_values; 626 const PetscInt *row_cmat_indices; 627 PetscInt size_of_constraint,j; 628 629 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 630 for (j=0;j<size_of_constraint;j++) { 631 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 632 } 633 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 634 } 635 if (F) { 636 Mat B; 637 638 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 639 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 640 ierr = MatDestroy(&B);CHKERRQ(ierr); 641 } else { 642 PetscScalar *xarray; 643 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 644 for (i=0;i<n_constraints;i++) { 645 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 646 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 647 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 650 } 651 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 652 } 653 654 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 655 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 656 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 657 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 658 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 659 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 660 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 661 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 662 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 663 ierr = MatDestroy(&M2);CHKERRQ(ierr); 664 ierr = MatDestroy(&M3);CHKERRQ(ierr); 665 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 666 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 667 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 668 ierr = MatDestroy(&M1);CHKERRQ(ierr); 669 } 670 /* Get submatrices from subdomain matrix */ 671 if (n_vertices) { 672 Mat newmat; 673 IS is_aux; 674 PetscInt ibs,mbs; 675 PetscBool issbaij; 676 677 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 678 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 679 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 680 if (ibs != mbs) { /* need to convert to SEQAIJ */ 681 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 682 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 683 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 684 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 685 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 686 } else { 687 /* this is safe */ 688 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 689 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 690 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 691 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 692 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 693 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 694 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 695 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 696 } else { 697 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 698 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 699 } 700 } 701 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 702 } 703 704 /* Matrix of coarse basis functions (local) */ 705 if (pcbddc->coarse_phi_B) { 706 PetscInt on_B,on_primal,on_D=n_D; 707 if (pcbddc->coarse_phi_D) { 708 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 709 } 710 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 711 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 712 PetscScalar *marray; 713 714 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 715 ierr = PetscFree(marray);CHKERRQ(ierr); 716 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 717 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 718 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 719 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 720 } 721 } 722 723 if (!pcbddc->coarse_phi_B) { 724 PetscScalar *marray; 725 726 n = n_B*pcbddc->local_primal_size; 727 if (pcbddc->switch_static || pcbddc->dbg_flag) { 728 n += n_D*pcbddc->local_primal_size; 729 } 730 if (!pcbddc->issym) { 731 n *= 2; 732 } 733 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 734 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 735 n = n_B*pcbddc->local_primal_size; 736 if (pcbddc->switch_static || pcbddc->dbg_flag) { 737 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 738 n += n_D*pcbddc->local_primal_size; 739 } 740 if (!pcbddc->issym) { 741 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 742 if (pcbddc->switch_static || pcbddc->dbg_flag) { 743 n = n_B*pcbddc->local_primal_size; 744 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 745 } 746 } else { 747 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 748 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 749 if (pcbddc->switch_static || pcbddc->dbg_flag) { 750 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 751 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 752 } 753 } 754 } 755 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 756 /* vertices */ 757 if (n_vertices) { 758 759 if (n_R) { 760 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 761 PetscBLASInt B_N,B_one = 1; 762 PetscScalar *x,*y; 763 764 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 766 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 767 if (F) { 768 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 769 } else { 770 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 771 for (i=0;i<n_vertices;i++) { 772 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 773 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 774 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 775 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 776 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 777 } 778 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 779 } 780 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 781 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 782 if (n_constraints) { 783 Mat B; 784 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 786 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 787 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 788 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 789 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 790 ierr = MatDestroy(&B);CHKERRQ(ierr); 791 } 792 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 793 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 794 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 795 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 796 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 797 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 798 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 799 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 800 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 801 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 802 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 803 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 804 } else { 805 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 806 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 807 } 808 /* coarse basis functions */ 809 for (i=0;i<n_vertices;i++) { 810 PetscScalar *y; 811 812 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 813 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 814 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 815 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 816 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 817 y[n_B*i+idx_V_B[i]] = 1.0; 818 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 819 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 820 821 if (pcbddc->switch_static || pcbddc->dbg_flag) { 822 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 823 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 824 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 827 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 828 } 829 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 830 } 831 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 832 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 833 } 834 835 if (n_constraints) { 836 Mat B; 837 838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 839 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 840 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 841 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 842 if (n_vertices) { 843 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 844 } 845 ierr = MatDestroy(&B);CHKERRQ(ierr); 846 /* coarse basis functions */ 847 for (i=0;i<n_constraints;i++) { 848 PetscScalar *y; 849 850 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 851 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 852 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 853 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 854 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 855 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 856 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 857 858 if (pcbddc->switch_static || pcbddc->dbg_flag) { 859 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 860 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 861 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 864 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 865 } 866 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 867 } 868 } 869 870 /* compute other basis functions for non-symmetric problems */ 871 if (!pcbddc->issym) { 872 Mat B,X; 873 874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 875 876 if (n_constraints) { 877 Mat S_CCT,B_C; 878 879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 880 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 881 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 882 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 883 if (n_vertices) { 884 Mat B_V,S_VCT; 885 886 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 887 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 888 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 889 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 890 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 891 } 892 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 893 } 894 if (n_vertices && n_R) { 895 Mat A_VRT; 896 PetscBLASInt B_N,B_one = 1; 897 898 if (!n_constraints) { /* if there are no constraints, reset work */ 899 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 900 } 901 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 902 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 903 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 904 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 905 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 906 } 907 908 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 909 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 910 for (i=0;i<pcbddc->local_primal_size;i++) { 911 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 912 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 913 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 914 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 915 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 916 } 917 } else { 918 for (i=0;i<pcbddc->local_primal_size;i++) { 919 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 921 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 922 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 923 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 924 } 925 } 926 ierr = MatDestroy(&B);CHKERRQ(ierr); 927 /* coarse basis functions */ 928 for (i=0;i<pcbddc->local_primal_size;i++) { 929 PetscScalar *y; 930 931 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 932 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 933 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 934 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 if (i<n_vertices) { 937 y[n_B*i+idx_V_B[i]] = 1.0; 938 } 939 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 940 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 941 942 if (pcbddc->switch_static || pcbddc->dbg_flag) { 943 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 944 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 945 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 948 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 949 } 950 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 951 } 952 ierr = MatDestroy(&X);CHKERRQ(ierr); 953 } 954 955 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 956 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 957 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 958 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 959 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 960 /* Checking coarse_sub_mat and coarse basis functios */ 961 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 962 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 963 if (pcbddc->dbg_flag) { 964 Mat coarse_sub_mat; 965 Mat AUXMAT,TM1,TM2,TM3,TM4; 966 Mat coarse_phi_D,coarse_phi_B; 967 Mat coarse_psi_D,coarse_psi_B; 968 Mat A_II,A_BB,A_IB,A_BI; 969 Mat C_B,CPHI; 970 IS is_dummy; 971 Vec mones; 972 MatType checkmattype=MATSEQAIJ; 973 PetscReal real_value; 974 975 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 976 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 977 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 978 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 979 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 980 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 981 if (unsymmetric_check) { 982 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 983 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 984 } 985 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 986 987 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 990 if (unsymmetric_check) { 991 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 992 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 993 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 994 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 995 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 996 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 997 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 998 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 999 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1000 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1001 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1002 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1003 } else { 1004 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1005 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1006 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1007 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1008 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1009 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1010 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1011 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1012 } 1013 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1014 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1015 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1016 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1017 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1018 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1019 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1020 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1021 1022 /* check constraints */ 1023 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1024 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1025 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1026 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1027 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1028 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1029 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1030 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1031 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1032 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1033 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1034 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1035 1036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1037 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1038 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1039 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1040 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1041 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1042 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1043 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1044 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1045 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1046 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1047 if (unsymmetric_check) { 1048 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1049 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1050 } 1051 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1052 } 1053 1054 /* free memory */ 1055 ierr = PetscFree(work);CHKERRQ(ierr); 1056 if (n_vertices) { 1057 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1058 } 1059 if (n_constraints) { 1060 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1061 } 1062 /* get back data */ 1063 *coarse_submat_vals_n = coarse_submat_vals; 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1069 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1070 { 1071 Mat *work_mat; 1072 IS isrow_s,iscol_s; 1073 PetscBool rsorted,csorted; 1074 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1075 PetscErrorCode ierr; 1076 1077 PetscFunctionBegin; 1078 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1079 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1080 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1081 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1082 1083 if (!rsorted) { 1084 const PetscInt *idxs; 1085 PetscInt *idxs_sorted,i; 1086 1087 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1088 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1089 for (i=0;i<rsize;i++) { 1090 idxs_perm_r[i] = i; 1091 } 1092 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1093 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1094 for (i=0;i<rsize;i++) { 1095 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1096 } 1097 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1098 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1099 } else { 1100 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1101 isrow_s = isrow; 1102 } 1103 1104 if (!csorted) { 1105 if (isrow == iscol) { 1106 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1107 iscol_s = isrow_s; 1108 } else { 1109 const PetscInt *idxs; 1110 PetscInt *idxs_sorted,i; 1111 1112 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1113 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1114 for (i=0;i<csize;i++) { 1115 idxs_perm_c[i] = i; 1116 } 1117 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1119 for (i=0;i<csize;i++) { 1120 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1121 } 1122 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1123 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1124 } 1125 } else { 1126 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1127 iscol_s = iscol; 1128 } 1129 1130 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1131 1132 if (!rsorted || !csorted) { 1133 Mat new_mat; 1134 IS is_perm_r,is_perm_c; 1135 1136 if (!rsorted) { 1137 PetscInt *idxs_r,i; 1138 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1139 for (i=0;i<rsize;i++) { 1140 idxs_r[idxs_perm_r[i]] = i; 1141 } 1142 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1143 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1144 } else { 1145 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1146 } 1147 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1148 1149 if (!csorted) { 1150 if (isrow_s == iscol_s) { 1151 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1152 is_perm_c = is_perm_r; 1153 } else { 1154 PetscInt *idxs_c,i; 1155 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1156 for (i=0;i<csize;i++) { 1157 idxs_c[idxs_perm_c[i]] = i; 1158 } 1159 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1160 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1161 } 1162 } else { 1163 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1164 } 1165 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1166 1167 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1168 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1169 work_mat[0] = new_mat; 1170 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1171 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1172 } 1173 1174 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1175 *B = work_mat[0]; 1176 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1177 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1178 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1184 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1185 { 1186 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1187 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1188 Mat new_mat; 1189 IS is_local,is_global; 1190 PetscInt local_size; 1191 PetscBool isseqaij; 1192 PetscErrorCode ierr; 1193 1194 PetscFunctionBegin; 1195 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1196 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1197 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1198 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1199 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1200 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1201 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1202 1203 /* check */ 1204 if (pcbddc->dbg_flag) { 1205 Vec x,x_change; 1206 PetscReal error; 1207 1208 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1209 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1210 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1211 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1213 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1214 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1216 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1217 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1220 ierr = VecDestroy(&x);CHKERRQ(ierr); 1221 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1222 } 1223 1224 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1225 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1226 if (isseqaij) { 1227 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1228 } else { 1229 Mat work_mat; 1230 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1231 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1232 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1233 } 1234 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1235 #if !defined(PETSC_USE_COMPLEX) 1236 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1237 #endif 1238 /* 1239 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1240 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1241 */ 1242 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1248 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1249 { 1250 PC_IS* pcis = (PC_IS*)(pc->data); 1251 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1252 IS is_aux1,is_aux2; 1253 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1254 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1255 PetscInt vbs,bs; 1256 PetscBT bitmask; 1257 PetscErrorCode ierr; 1258 1259 PetscFunctionBegin; 1260 /* 1261 No need to setup local scatters if 1262 - primal space is unchanged 1263 AND 1264 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1265 AND 1266 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1267 */ 1268 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1269 PetscFunctionReturn(0); 1270 } 1271 /* destroy old objects */ 1272 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1273 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1274 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1275 /* Set Non-overlapping dimensions */ 1276 n_B = pcis->n_B; n_D = pcis->n - n_B; 1277 n_vertices = pcbddc->n_actual_vertices; 1278 /* create auxiliary bitmask */ 1279 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1280 for (i=0;i<n_vertices;i++) { 1281 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1282 } 1283 1284 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1285 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1286 for (i=0, n_R=0; i<pcis->n; i++) { 1287 if (!PetscBTLookup(bitmask,i)) { 1288 idx_R_local[n_R] = i; 1289 n_R++; 1290 } 1291 } 1292 1293 /* Block code */ 1294 vbs = 1; 1295 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1296 if (bs>1 && !(n_vertices%bs)) { 1297 PetscBool is_blocked = PETSC_TRUE; 1298 PetscInt *vary; 1299 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1300 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1301 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1302 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1303 for (i=0; i<n_vertices/bs; i++) { 1304 if (vary[i]!=0 && vary[i]!=bs) { 1305 is_blocked = PETSC_FALSE; 1306 break; 1307 } 1308 } 1309 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1310 vbs = bs; 1311 for (i=0;i<n_R/vbs;i++) { 1312 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1313 } 1314 } 1315 ierr = PetscFree(vary);CHKERRQ(ierr); 1316 } 1317 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1318 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1319 1320 /* print some info if requested */ 1321 if (pcbddc->dbg_flag) { 1322 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1323 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1324 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1326 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 1329 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1330 } 1331 1332 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1333 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1334 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1335 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1336 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1337 for (i=0; i<n_D; i++) { 1338 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1339 } 1340 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1341 for (i=0, j=0; i<n_R; i++) { 1342 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1343 aux_array1[j++] = i; 1344 } 1345 } 1346 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1347 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1348 for (i=0, j=0; i<n_B; i++) { 1349 if (!PetscBTLookup(bitmask,is_indices[i])) { 1350 aux_array2[j++] = i; 1351 } 1352 } 1353 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1354 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1355 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1356 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1357 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1358 1359 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1360 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1361 for (i=0, j=0; i<n_R; i++) { 1362 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1363 aux_array1[j++] = i; 1364 } 1365 } 1366 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1367 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1368 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1369 } 1370 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1371 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1378 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1379 { 1380 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1381 PC_IS *pcis = (PC_IS*)pc->data; 1382 PC pc_temp; 1383 Mat A_RR; 1384 MatReuse reuse; 1385 PetscScalar m_one = -1.0; 1386 PetscReal value; 1387 PetscInt n_D,n_R,ibs,mbs; 1388 PetscBool use_exact,use_exact_reduced,issbaij; 1389 PetscErrorCode ierr; 1390 /* prefixes stuff */ 1391 char dir_prefix[256],neu_prefix[256],str_level[16]; 1392 size_t len; 1393 1394 PetscFunctionBegin; 1395 1396 /* compute prefixes */ 1397 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1398 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1399 if (!pcbddc->current_level) { 1400 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1401 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1402 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1403 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1404 } else { 1405 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1406 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1407 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1408 len -= 15; /* remove "pc_bddc_coarse_" */ 1409 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1410 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1411 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1412 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1413 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1414 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1415 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1416 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1417 } 1418 1419 /* DIRICHLET PROBLEM */ 1420 if (dirichlet) { 1421 if (pcbddc->issym) { 1422 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1423 } 1424 /* Matrix for Dirichlet problem is pcis->A_II */ 1425 n_D = pcis->n - pcis->n_B; 1426 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1427 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1428 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1429 /* default */ 1430 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1431 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1433 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1434 if (issbaij) { 1435 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1436 } else { 1437 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1438 } 1439 /* Allow user's customization */ 1440 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1441 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1442 } 1443 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1444 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1445 if (!n_D) { 1446 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1447 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1448 } 1449 /* Set Up KSP for Dirichlet problem of BDDC */ 1450 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1451 /* set ksp_D into pcis data */ 1452 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1453 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1454 pcis->ksp_D = pcbddc->ksp_D; 1455 } 1456 1457 /* NEUMANN PROBLEM */ 1458 A_RR = 0; 1459 if (neumann) { 1460 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1461 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1462 if (pcbddc->ksp_R) { /* already created ksp */ 1463 PetscInt nn_R; 1464 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1465 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1466 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1467 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1468 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1469 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1470 reuse = MAT_INITIAL_MATRIX; 1471 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1472 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1473 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1474 reuse = MAT_INITIAL_MATRIX; 1475 } else { /* safe to reuse the matrix */ 1476 reuse = MAT_REUSE_MATRIX; 1477 } 1478 } 1479 /* last check */ 1480 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1481 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1482 reuse = MAT_INITIAL_MATRIX; 1483 } 1484 } else { /* first time, so we need to create the matrix */ 1485 reuse = MAT_INITIAL_MATRIX; 1486 } 1487 /* extract A_RR */ 1488 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1489 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1490 if (ibs != mbs) { 1491 Mat newmat; 1492 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1493 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1494 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1495 } else { 1496 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1497 } 1498 if (pcbddc->issym) { 1499 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1500 } 1501 if (!pcbddc->ksp_R) { /* create object if not present */ 1502 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1503 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1504 /* default */ 1505 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1506 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1507 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1508 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1509 if (issbaij) { 1510 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1511 } else { 1512 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1513 } 1514 /* Allow user's customization */ 1515 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1516 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1517 } 1518 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1519 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1520 if (!n_R) { 1521 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1522 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1523 } 1524 /* Set Up KSP for Neumann problem of BDDC */ 1525 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1526 } 1527 1528 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1529 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1530 if (pcbddc->dbg_flag) { 1531 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1532 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1533 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1534 } 1535 if (dirichlet) { /* Dirichlet */ 1536 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1537 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1538 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1539 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1540 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1541 /* need to be adapted? */ 1542 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1543 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1544 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1545 /* print info */ 1546 if (pcbddc->dbg_flag) { 1547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1549 } 1550 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1551 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1552 } 1553 } 1554 if (neumann) { /* Neumann */ 1555 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1556 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1557 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1558 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1559 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1560 /* need to be adapted? */ 1561 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1562 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1563 /* print info */ 1564 if (pcbddc->dbg_flag) { 1565 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1566 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1567 } 1568 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1569 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1570 } 1571 } 1572 } 1573 /* free Neumann problem's matrix */ 1574 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1580 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1581 { 1582 PetscErrorCode ierr; 1583 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1584 1585 PetscFunctionBegin; 1586 if (applytranspose) { 1587 if (pcbddc->local_auxmat1) { 1588 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1589 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1590 } 1591 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1592 } else { 1593 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1594 if (pcbddc->local_auxmat1) { 1595 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1596 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1597 } 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1605 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1606 { 1607 PetscErrorCode ierr; 1608 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1609 PC_IS* pcis = (PC_IS*) (pc->data); 1610 const PetscScalar zero = 0.0; 1611 1612 PetscFunctionBegin; 1613 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1614 if (applytranspose) { 1615 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1616 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1617 } else { 1618 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1619 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1620 } 1621 /* start communications from local primal nodes to rhs of coarse solver */ 1622 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1623 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 1626 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1627 /* TODO remove null space when doing multilevel */ 1628 if (pcbddc->coarse_ksp) { 1629 Vec rhs,sol; 1630 1631 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1632 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1633 if (applytranspose) { 1634 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1635 } else { 1636 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1637 } 1638 } 1639 1640 /* Local solution on R nodes */ 1641 if (pcis->n) { 1642 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1643 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1644 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1645 if (pcbddc->switch_static) { 1646 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1647 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1648 } 1649 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1650 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1651 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1652 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 if (pcbddc->switch_static) { 1654 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1655 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 } 1657 } 1658 1659 /* communications from coarse sol to local primal nodes */ 1660 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1661 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1662 1663 /* Sum contributions from two levels */ 1664 if (applytranspose) { 1665 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1666 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1667 } else { 1668 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1669 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1676 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1677 { 1678 PetscErrorCode ierr; 1679 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1680 PetscScalar *array; 1681 Vec from,to; 1682 1683 PetscFunctionBegin; 1684 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1685 from = pcbddc->coarse_vec; 1686 to = pcbddc->vec1_P; 1687 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1688 Vec tvec; 1689 1690 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1691 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1692 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1693 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1694 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1695 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1696 } 1697 } else { /* from local to global -> put data in coarse right hand side */ 1698 from = pcbddc->vec1_P; 1699 to = pcbddc->coarse_vec; 1700 } 1701 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1707 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1708 { 1709 PetscErrorCode ierr; 1710 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1711 PetscScalar *array; 1712 Vec from,to; 1713 1714 PetscFunctionBegin; 1715 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1716 from = pcbddc->coarse_vec; 1717 to = pcbddc->vec1_P; 1718 } else { /* from local to global -> put data in coarse right hand side */ 1719 from = pcbddc->vec1_P; 1720 to = pcbddc->coarse_vec; 1721 } 1722 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1723 if (smode == SCATTER_FORWARD) { 1724 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1725 Vec tvec; 1726 1727 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1728 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1729 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1730 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1731 } 1732 } else { 1733 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1734 ierr = VecResetArray(from);CHKERRQ(ierr); 1735 } 1736 } 1737 PetscFunctionReturn(0); 1738 } 1739 1740 /* uncomment for testing purposes */ 1741 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1744 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1745 { 1746 PetscErrorCode ierr; 1747 PC_IS* pcis = (PC_IS*)(pc->data); 1748 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1749 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1750 /* one and zero */ 1751 PetscScalar one=1.0,zero=0.0; 1752 /* space to store constraints and their local indices */ 1753 PetscScalar *temp_quadrature_constraint; 1754 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1755 /* iterators */ 1756 PetscInt i,j,k,total_counts,temp_start_ptr; 1757 /* BLAS integers */ 1758 PetscBLASInt lwork,lierr; 1759 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1760 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1761 /* reuse */ 1762 PetscInt olocal_primal_size; 1763 PetscInt *oprimal_indices_local_idxs; 1764 /* change of basis */ 1765 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1766 PetscBool boolforchange,qr_needed; 1767 PetscBT touched,change_basis,qr_needed_idx; 1768 /* auxiliary stuff */ 1769 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1770 PetscInt ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL; 1771 PetscScalar *temp_quadrature_constraint_work=NULL; 1772 /* some quantities */ 1773 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1774 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1775 1776 PetscFunctionBegin; 1777 /* Destroy Mat objects computed previously */ 1778 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1779 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1780 1781 /* print some info */ 1782 if (pcbddc->dbg_flag) { 1783 IS vertices; 1784 PetscInt nv,nedges,nfaces; 1785 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1786 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1787 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1788 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1789 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1790 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1791 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1792 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1793 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1794 } 1795 1796 if (!pcbddc->adaptive_selection) { 1797 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1798 MatNullSpace nearnullsp; 1799 const Vec *nearnullvecs; 1800 Vec *localnearnullsp; 1801 PetscScalar *array; 1802 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1803 PetscBool nnsp_has_cnst; 1804 /* LAPACK working arrays for SVD or POD */ 1805 PetscBool skip_lapack; 1806 PetscScalar *work; 1807 PetscReal *singular_vals; 1808 #if defined(PETSC_USE_COMPLEX) 1809 PetscReal *rwork; 1810 #endif 1811 #if defined(PETSC_MISSING_LAPACK_GESVD) 1812 PetscScalar *temp_basis,*correlation_mat; 1813 #else 1814 PetscBLASInt dummy_int=1; 1815 PetscScalar dummy_scalar=1.; 1816 #endif 1817 1818 /* Get index sets for faces, edges and vertices from graph */ 1819 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1820 /* free unneeded index sets */ 1821 if (!pcbddc->use_vertices) { 1822 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1823 } 1824 if (!pcbddc->use_edges) { 1825 for (i=0;i<n_ISForEdges;i++) { 1826 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1827 } 1828 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1829 n_ISForEdges = 0; 1830 } 1831 if (!pcbddc->use_faces) { 1832 for (i=0;i<n_ISForFaces;i++) { 1833 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1834 } 1835 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1836 n_ISForFaces = 0; 1837 } 1838 /* HACKS (the following two blocks of code) */ 1839 if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) { 1840 pcbddc->use_change_of_basis = PETSC_TRUE; 1841 if (!ISForEdges) { 1842 pcbddc->use_change_on_faces = PETSC_TRUE; 1843 } 1844 } 1845 if (pcbddc->NullSpace) { 1846 /* use_change_of_basis should be consistent among processors */ 1847 PetscBool tbool[2],gbool[2]; 1848 tbool[0] = pcbddc->use_change_of_basis; 1849 tbool[1] = pcbddc->use_change_on_faces; 1850 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1851 pcbddc->use_change_of_basis = gbool[0]; 1852 pcbddc->use_change_on_faces = gbool[1]; 1853 } 1854 1855 /* check if near null space is attached to global mat */ 1856 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1857 if (nearnullsp) { 1858 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1859 /* remove any stored info */ 1860 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1861 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1862 /* store information for BDDC solver reuse */ 1863 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1864 pcbddc->onearnullspace = nearnullsp; 1865 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1866 for (i=0;i<nnsp_size;i++) { 1867 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1868 } 1869 } else { /* if near null space is not provided BDDC uses constants by default */ 1870 nnsp_size = 0; 1871 nnsp_has_cnst = PETSC_TRUE; 1872 } 1873 /* get max number of constraints on a single cc */ 1874 max_constraints = nnsp_size; 1875 if (nnsp_has_cnst) max_constraints++; 1876 1877 /* 1878 Evaluate maximum storage size needed by the procedure 1879 - temp_indices will contain start index of each constraint stored as follows 1880 - temp_indices_to_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1881 - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1882 - temp_quadrature_constraint [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself 1883 */ 1884 total_counts = n_ISForFaces+n_ISForEdges; 1885 total_counts *= max_constraints; 1886 n_vertices = 0; 1887 if (ISForVertices) { 1888 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1889 } 1890 total_counts += n_vertices; 1891 ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr); 1892 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1893 total_counts = 0; 1894 max_size_of_constraint = 0; 1895 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1896 if (i<n_ISForEdges) { 1897 used_IS = &ISForEdges[i]; 1898 } else { 1899 used_IS = &ISForFaces[i-n_ISForEdges]; 1900 } 1901 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1902 total_counts += j; 1903 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1904 } 1905 total_counts *= max_constraints; 1906 total_counts += n_vertices; 1907 ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr); 1908 /* get local part of global near null space vectors */ 1909 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1910 for (k=0;k<nnsp_size;k++) { 1911 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1912 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1913 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1914 } 1915 1916 /* whether or not to skip lapack calls */ 1917 skip_lapack = PETSC_TRUE; 1918 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1919 1920 /* allocate some auxiliary stuff */ 1921 if (!skip_lapack || pcbddc->use_qr_single) { 1922 ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr); 1923 } else { 1924 gidxs = NULL; 1925 permutation = NULL; 1926 temp_indices_to_constraint_work = NULL; 1927 temp_quadrature_constraint_work = NULL; 1928 } 1929 1930 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1931 if (!skip_lapack) { 1932 PetscScalar temp_work; 1933 1934 #if defined(PETSC_MISSING_LAPACK_GESVD) 1935 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1936 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1937 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1938 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1939 #if defined(PETSC_USE_COMPLEX) 1940 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1941 #endif 1942 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1943 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1944 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1945 lwork = -1; 1946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1947 #if !defined(PETSC_USE_COMPLEX) 1948 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1949 #else 1950 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1951 #endif 1952 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1953 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1954 #else /* on missing GESVD */ 1955 /* SVD */ 1956 PetscInt max_n,min_n; 1957 max_n = max_size_of_constraint; 1958 min_n = max_constraints; 1959 if (max_size_of_constraint < max_constraints) { 1960 min_n = max_size_of_constraint; 1961 max_n = max_constraints; 1962 } 1963 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1964 #if defined(PETSC_USE_COMPLEX) 1965 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1966 #endif 1967 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1968 lwork = -1; 1969 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1970 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1971 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1972 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1973 #if !defined(PETSC_USE_COMPLEX) 1974 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1975 #else 1976 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1977 #endif 1978 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1979 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1980 #endif /* on missing GESVD */ 1981 /* Allocate optimal workspace */ 1982 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1983 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1984 } 1985 /* Now we can loop on constraining sets */ 1986 total_counts = 0; 1987 temp_indices[0] = 0; 1988 /* vertices */ 1989 if (ISForVertices) { 1990 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1991 if (nnsp_has_cnst) { /* consider all vertices */ 1992 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1993 for (i=0;i<n_vertices;i++) { 1994 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1995 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1996 total_counts++; 1997 } 1998 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1999 PetscBool used_vertex; 2000 for (i=0;i<n_vertices;i++) { 2001 used_vertex = PETSC_FALSE; 2002 k = 0; 2003 while (!used_vertex && k<nnsp_size) { 2004 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2005 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2006 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 2007 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 2008 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 2009 total_counts++; 2010 used_vertex = PETSC_TRUE; 2011 } 2012 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2013 k++; 2014 } 2015 } 2016 } 2017 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2018 n_vertices = total_counts; 2019 } 2020 2021 /* edges and faces */ 2022 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2023 if (ncc<n_ISForEdges) { 2024 used_IS = &ISForEdges[ncc]; 2025 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2026 } else { 2027 used_IS = &ISForFaces[ncc-n_ISForEdges]; 2028 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2029 } 2030 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2031 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 2032 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 2033 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2034 /* change of basis should not be performed on local periodic nodes */ 2035 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2036 if (nnsp_has_cnst) { 2037 PetscScalar quad_value; 2038 temp_constraints++; 2039 if (!pcbddc->use_nnsp_true) { 2040 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2041 } else { 2042 quad_value = 1.0; 2043 } 2044 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2045 for (j=0;j<size_of_constraint;j++) { 2046 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 2047 } 2048 /* sort by global ordering if using lapack subroutines (not needed!) */ 2049 if (!skip_lapack || pcbddc->use_qr_single) { 2050 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2051 for (j=0;j<size_of_constraint;j++) { 2052 permutation[j]=j; 2053 } 2054 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2055 for (j=0;j<size_of_constraint;j++) { 2056 if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen"); 2057 } 2058 for (j=0;j<size_of_constraint;j++) { 2059 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2060 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2061 } 2062 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2063 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2064 } 2065 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2066 total_counts++; 2067 } 2068 for (k=0;k<nnsp_size;k++) { 2069 PetscReal real_value; 2070 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2071 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2072 for (j=0;j<size_of_constraint;j++) { 2073 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 2074 } 2075 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2076 /* check if array is null on the connected component */ 2077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2078 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 2079 if (real_value > 0.0) { /* keep indices and values */ 2080 /* sort by global ordering if using lapack subroutines */ 2081 if (!skip_lapack || pcbddc->use_qr_single) { 2082 ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr); 2083 for (j=0;j<size_of_constraint;j++) { 2084 permutation[j]=j; 2085 } 2086 ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr); 2087 for (j=0;j<size_of_constraint;j++) { 2088 temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]]; 2089 temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]]; 2090 } 2091 ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2092 ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr); 2093 } 2094 temp_constraints++; 2095 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 2096 total_counts++; 2097 } 2098 } 2099 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2100 valid_constraints = temp_constraints; 2101 if (!pcbddc->use_nnsp_true && temp_constraints) { 2102 if (temp_constraints == 1) { /* just normalize the constraint */ 2103 PetscScalar norm; 2104 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2105 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2106 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2107 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one)); 2108 } else { /* perform SVD */ 2109 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2110 2111 #if defined(PETSC_MISSING_LAPACK_GESVD) 2112 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2113 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2114 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2115 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2116 from that computed using LAPACKgesvd 2117 -> This is due to a different computation of eigenvectors in LAPACKheev 2118 -> The quality of the POD-computed basis will be the same */ 2119 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2120 /* Store upper triangular part of correlation matrix */ 2121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2122 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2123 for (j=0;j<temp_constraints;j++) { 2124 for (k=0;k<j+1;k++) { 2125 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one)); 2126 } 2127 } 2128 /* compute eigenvalues and eigenvectors of correlation matrix */ 2129 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2130 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2131 #if !defined(PETSC_USE_COMPLEX) 2132 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2133 #else 2134 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2135 #endif 2136 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2137 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2138 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2139 j = 0; 2140 while (j < temp_constraints && singular_vals[j] < tol) j++; 2141 total_counts = total_counts-j; 2142 valid_constraints = temp_constraints-j; 2143 /* scale and copy POD basis into used quadrature memory */ 2144 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2145 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2148 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2150 if (j<temp_constraints) { 2151 PetscInt ii; 2152 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 2153 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2154 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2155 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2156 for (k=0;k<temp_constraints-j;k++) { 2157 for (ii=0;ii<size_of_constraint;ii++) { 2158 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2159 } 2160 } 2161 } 2162 #else /* on missing GESVD */ 2163 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2164 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2166 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2167 #if !defined(PETSC_USE_COMPLEX) 2168 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2169 #else 2170 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2171 #endif 2172 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2173 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2174 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2175 k = temp_constraints; 2176 if (k > size_of_constraint) k = size_of_constraint; 2177 j = 0; 2178 while (j < k && singular_vals[k-j-1] < tol) j++; 2179 valid_constraints = k-j; 2180 total_counts = total_counts-temp_constraints+valid_constraints; 2181 #endif /* on missing GESVD */ 2182 } 2183 } 2184 /* setting change_of_basis flag is safe now */ 2185 if (boolforchange) { 2186 for (j=0;j<valid_constraints;j++) { 2187 PetscBTSet(change_basis,total_counts-j-1); 2188 } 2189 } 2190 } 2191 /* free workspace */ 2192 if (!skip_lapack || pcbddc->use_qr_single) { 2193 ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr); 2194 } 2195 if (!skip_lapack) { 2196 ierr = PetscFree(work);CHKERRQ(ierr); 2197 #if defined(PETSC_USE_COMPLEX) 2198 ierr = PetscFree(rwork);CHKERRQ(ierr); 2199 #endif 2200 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2201 #if defined(PETSC_MISSING_LAPACK_GESVD) 2202 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2203 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2204 #endif 2205 } 2206 for (k=0;k<nnsp_size;k++) { 2207 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2208 } 2209 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2210 /* free index sets of faces, edges and vertices */ 2211 for (i=0;i<n_ISForFaces;i++) { 2212 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2213 } 2214 if (n_ISForFaces) { 2215 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2216 } 2217 for (i=0;i<n_ISForEdges;i++) { 2218 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2219 } 2220 if (n_ISForEdges) { 2221 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2222 } 2223 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2224 } else { 2225 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2226 PetscInt cum = 0; 2227 2228 total_counts = 0; 2229 n_vertices = 0; 2230 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 2231 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2232 } 2233 max_constraints = 0; 2234 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2235 total_counts += pcbddc->adaptive_constraints_n[i]; 2236 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2237 } 2238 temp_indices = pcbddc->adaptive_constraints_ptrs; 2239 temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs; 2240 temp_quadrature_constraint = pcbddc->adaptive_constraints_data; 2241 2242 #if 0 2243 printf("Found %d totals\n",total_counts); 2244 for (i=0;i<total_counts;i++) { 2245 printf("const %d, start %d",i,temp_indices[i]); 2246 printf(" end %d:\n",temp_indices[i+1]); 2247 for (j=temp_indices[i];j<temp_indices[i+1];j++) { 2248 printf(" idxs %d",temp_indices_to_constraint[j]); 2249 printf(" data %1.2e\n",temp_quadrature_constraint[j]); 2250 } 2251 } 2252 for (i=0;i<n_vertices;i++) { 2253 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2254 } 2255 for (i=0;i<sub_schurs->n_subs;i++) { 2256 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2257 } 2258 #endif 2259 2260 max_size_of_constraint = 0; 2261 for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]); 2262 ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr); 2263 /* Change of basis */ 2264 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2265 if (pcbddc->use_change_of_basis) { 2266 cum = n_vertices; 2267 for (i=0;i<sub_schurs->n_subs;i++) { 2268 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2269 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2270 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2271 } 2272 } 2273 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2274 } 2275 } 2276 } 2277 2278 /* map temp_indices_to_constraint in boundary numbering */ 2279 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 2280 if (i != temp_indices[total_counts]) { 2281 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 2282 } 2283 2284 /* set quantities in pcbddc data structure and store previous primal size */ 2285 /* n_vertices defines the number of subdomain corners in the primal space */ 2286 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 2287 olocal_primal_size = pcbddc->local_primal_size; 2288 pcbddc->local_primal_size = total_counts; 2289 pcbddc->n_vertices = n_vertices; 2290 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 2291 2292 /* Create constraint matrix */ 2293 /* The constraint matrix is used to compute the l2g map of primal dofs */ 2294 /* so we need to set it up properly either with or without change of basis */ 2295 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2296 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2297 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2298 /* array to compute a local numbering of constraints : vertices first then constraints */ 2299 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr); 2300 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 2301 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 2302 ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr); 2303 /* auxiliary stuff for basis change */ 2304 ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr); 2305 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 2306 2307 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2308 total_primal_vertices=0; 2309 for (i=0;i<pcbddc->local_primal_size;i++) { 2310 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2311 if (size_of_constraint == 1) { 2312 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 2313 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 2314 aux_primal_minloc[total_primal_vertices]=0; 2315 total_primal_vertices++; 2316 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 2317 PetscInt min_loc,min_index; 2318 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 2319 /* find first untouched local node */ 2320 k = 0; 2321 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 2322 min_index = global_indices[k]; 2323 min_loc = k; 2324 /* search the minimum among global nodes already untouched on the cc */ 2325 for (k=1;k<size_of_constraint;k++) { 2326 /* there can be more than one constraint on a single connected component */ 2327 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 2328 min_index = global_indices[k]; 2329 min_loc = k; 2330 } 2331 } 2332 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 2333 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 2334 aux_primal_minloc[total_primal_vertices]=min_loc; 2335 total_primal_vertices++; 2336 } 2337 } 2338 /* determine if a QR strategy is needed for change of basis */ 2339 qr_needed = PETSC_FALSE; 2340 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 2341 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2342 if (PetscBTLookup(change_basis,i)) { 2343 if (!pcbddc->use_qr_single) { 2344 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2345 j = 0; 2346 for (k=0;k<size_of_constraint;k++) { 2347 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 2348 j++; 2349 } 2350 } 2351 /* found more than one primal dof on the cc */ 2352 if (j > 1) { 2353 PetscBTSet(qr_needed_idx,i); 2354 qr_needed = PETSC_TRUE; 2355 } 2356 } else { 2357 PetscBTSet(qr_needed_idx,i); 2358 qr_needed = PETSC_TRUE; 2359 } 2360 } 2361 } 2362 /* free workspace */ 2363 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2364 2365 /* permute indices in order to have a sorted set of vertices */ 2366 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 2367 2368 /* nonzero structure of constraint matrix */ 2369 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2370 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2371 j=total_primal_vertices; 2372 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2373 if (!PetscBTLookup(change_basis,i)) { 2374 nnz[j]=temp_indices[i+1]-temp_indices[i]; 2375 j++; 2376 } 2377 } 2378 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2379 ierr = PetscFree(nnz);CHKERRQ(ierr); 2380 /* set values in constraint matrix */ 2381 for (i=0;i<total_primal_vertices;i++) { 2382 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2383 } 2384 total_counts = total_primal_vertices; 2385 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2386 if (!PetscBTLookup(change_basis,i)) { 2387 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 2388 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 2389 total_counts++; 2390 } 2391 } 2392 /* assembling */ 2393 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2394 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2395 /* 2396 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2397 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2398 */ 2399 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2400 if (pcbddc->use_change_of_basis) { 2401 /* dual and primal dofs on a single cc */ 2402 PetscInt dual_dofs,primal_dofs; 2403 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 2404 PetscInt primal_counter; 2405 /* working stuff for GEQRF */ 2406 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2407 PetscBLASInt lqr_work; 2408 /* working stuff for UNGQR */ 2409 PetscScalar *gqr_work,lgqr_work_t; 2410 PetscBLASInt lgqr_work; 2411 /* working stuff for TRTRS */ 2412 PetscScalar *trs_rhs; 2413 PetscBLASInt Blas_NRHS; 2414 /* pointers for values insertion into change of basis matrix */ 2415 PetscInt *start_rows,*start_cols; 2416 PetscScalar *start_vals; 2417 /* working stuff for values insertion */ 2418 PetscBT is_primal; 2419 /* matrix sizes */ 2420 PetscInt global_size,local_size; 2421 /* temporary change of basis */ 2422 Mat localChangeOfBasisMatrix; 2423 /* extra space for debugging */ 2424 PetscScalar *dbg_work; 2425 2426 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2427 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2428 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2429 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2430 /* nonzeros for local mat */ 2431 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2432 for (i=0;i<pcis->n;i++) nnz[i]=1; 2433 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 2434 if (PetscBTLookup(change_basis,i)) { 2435 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 2436 if (PetscBTLookup(qr_needed_idx,i)) { 2437 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2438 } else { 2439 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2; 2440 /* get local primal index on the cc */ 2441 j = 0; 2442 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 2443 nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint; 2444 } 2445 } 2446 } 2447 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2448 ierr = PetscFree(nnz);CHKERRQ(ierr); 2449 /* Set initial identity in the matrix */ 2450 for (i=0;i<pcis->n;i++) { 2451 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2452 } 2453 2454 if (pcbddc->dbg_flag) { 2455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2456 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2457 } 2458 2459 2460 /* Now we loop on the constraints which need a change of basis */ 2461 /* 2462 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2463 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2464 2465 Basic blocks of change of basis matrix T computed by 2466 2467 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2468 2469 | 1 0 ... 0 s_1/S | 2470 | 0 1 ... 0 s_2/S | 2471 | ... | 2472 | 0 ... 1 s_{n-1}/S | 2473 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2474 2475 with S = \sum_{i=1}^n s_i^2 2476 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2477 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2478 2479 - QR decomposition of constraints otherwise 2480 */ 2481 if (qr_needed) { 2482 /* space to store Q */ 2483 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2484 /* first we issue queries for optimal work */ 2485 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2486 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2487 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2488 lqr_work = -1; 2489 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2490 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2491 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2492 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2493 lgqr_work = -1; 2494 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2495 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2496 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2497 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2498 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2499 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2500 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2501 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2502 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2503 /* array to store scaling factors for reflectors */ 2504 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2505 /* array to store rhs and solution of triangular solver */ 2506 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2507 /* allocating workspace for check */ 2508 if (pcbddc->dbg_flag) { 2509 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2510 } 2511 } 2512 /* array to store whether a node is primal or not */ 2513 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2514 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2515 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2516 if (i != total_primal_vertices) { 2517 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2518 } 2519 for (i=0;i<total_primal_vertices;i++) { 2520 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2521 } 2522 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2523 2524 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2525 /* -> using implicit ordering contained in temp_indices data */ 2526 total_counts = pcbddc->n_vertices; 2527 primal_counter = total_counts; 2528 while (total_counts<pcbddc->local_primal_size) { 2529 primal_dofs = 1; 2530 if (PetscBTLookup(change_basis,total_counts)) { 2531 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 2532 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) { 2533 primal_dofs++; 2534 } 2535 /* get constraint info */ 2536 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 2537 dual_dofs = size_of_constraint-primal_dofs; 2538 2539 if (pcbddc->dbg_flag) { 2540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 2541 } 2542 2543 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2544 2545 /* copy quadrature constraints for change of basis check */ 2546 if (pcbddc->dbg_flag) { 2547 ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2548 } 2549 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2550 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2551 2552 /* compute QR decomposition of constraints */ 2553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2554 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2556 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2557 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2560 2561 /* explictly compute R^-T */ 2562 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2563 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2564 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2565 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2568 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2569 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2570 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2571 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2572 2573 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2574 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2575 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2576 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2577 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2578 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2579 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2580 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2581 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2582 2583 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2584 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2585 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2586 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2587 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2588 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2589 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2590 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2591 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2592 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2593 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 2594 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2595 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2596 2597 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2598 start_rows = &temp_indices_to_constraint[temp_indices[total_counts]]; 2599 /* insert cols for primal dofs */ 2600 for (j=0;j<primal_dofs;j++) { 2601 start_vals = &qr_basis[j*size_of_constraint]; 2602 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 2603 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2604 } 2605 /* insert cols for dual dofs */ 2606 for (j=0,k=0;j<dual_dofs;k++) { 2607 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 2608 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2609 start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k]; 2610 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2611 j++; 2612 } 2613 } 2614 2615 /* check change of basis */ 2616 if (pcbddc->dbg_flag) { 2617 PetscInt ii,jj; 2618 PetscBool valid_qr=PETSC_TRUE; 2619 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2620 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2621 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2623 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2624 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2625 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2626 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2627 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2628 for (jj=0;jj<size_of_constraint;jj++) { 2629 for (ii=0;ii<primal_dofs;ii++) { 2630 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2631 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2632 } 2633 } 2634 if (!valid_qr) { 2635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2636 for (jj=0;jj<size_of_constraint;jj++) { 2637 for (ii=0;ii<primal_dofs;ii++) { 2638 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2639 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2640 } 2641 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2642 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2643 } 2644 } 2645 } 2646 } else { 2647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2648 } 2649 } 2650 } else { /* simple transformation block */ 2651 PetscInt row,col; 2652 PetscScalar val,norm; 2653 2654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2655 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one)); 2656 for (j=0;j<size_of_constraint;j++) { 2657 PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2658 row = temp_indices_to_constraint[temp_indices[total_counts]+j]; 2659 if (!PetscBTLookup(is_primal,row_B)) { 2660 col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2661 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2662 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2663 } else { 2664 for (k=0;k<size_of_constraint;k++) { 2665 col = temp_indices_to_constraint[temp_indices[total_counts]+k]; 2666 if (row != col) { 2667 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2668 } else { 2669 val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm; 2670 } 2671 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2672 } 2673 } 2674 } 2675 if (pcbddc->dbg_flag) { 2676 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2677 } 2678 } 2679 /* increment primal counter */ 2680 primal_counter += primal_dofs; 2681 } else { 2682 if (pcbddc->dbg_flag) { 2683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2684 } 2685 } 2686 /* increment constraint counter total_counts */ 2687 total_counts += primal_dofs; 2688 } 2689 2690 /* free workspace */ 2691 if (qr_needed) { 2692 if (pcbddc->dbg_flag) { 2693 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2694 } 2695 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2696 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2697 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2698 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2699 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2700 } 2701 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2702 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2703 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2704 2705 /* assembling of global change of variable */ 2706 { 2707 Mat tmat; 2708 PetscInt bs; 2709 2710 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2711 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2712 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2713 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2714 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2715 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2716 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2717 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2718 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2719 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2720 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2721 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2722 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2723 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2724 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2725 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2726 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2727 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2728 } 2729 /* check */ 2730 if (pcbddc->dbg_flag) { 2731 PetscReal error; 2732 Vec x,x_change; 2733 2734 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2735 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2736 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2737 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2738 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2739 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2740 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2741 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2742 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2743 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2744 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2745 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2746 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2747 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2748 ierr = VecDestroy(&x);CHKERRQ(ierr); 2749 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2750 } 2751 2752 /* adapt sub_schurs computed (if any) */ 2753 if (pcbddc->use_deluxe_scaling) { 2754 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2755 if (sub_schurs->S_Ej_all) { 2756 Mat S_1,S_2,tmat; 2757 IS is_all_N; 2758 2759 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2760 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2761 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2762 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr); 2763 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2764 sub_schurs->S_Ej_all = S_1; 2765 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr); 2766 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2767 sub_schurs->sum_S_Ej_all = S_2; 2768 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2769 } 2770 } 2771 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2772 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2773 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2774 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2775 } 2776 2777 /* set up change of basis context */ 2778 if (pcbddc->ChangeOfBasisMatrix) { 2779 PCBDDCChange_ctx change_ctx; 2780 2781 if (!pcbddc->new_global_mat) { 2782 PetscInt global_size,local_size; 2783 2784 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2785 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2786 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2787 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2788 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2789 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2790 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2791 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2792 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2793 } else { 2794 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2795 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2796 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2797 } 2798 if (!pcbddc->user_ChangeOfBasisMatrix) { 2799 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2800 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2801 } else { 2802 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2803 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2804 } 2805 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2806 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2807 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2808 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2809 } 2810 2811 /* get indices in local ordering for vertices and constraints */ 2812 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2813 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 2814 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2815 } 2816 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2817 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2818 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2819 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2820 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2821 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2822 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2823 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2824 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2825 /* set quantities in PCBDDC data struct */ 2826 pcbddc->n_actual_vertices = i; 2827 /* check if a new primal space has been introduced */ 2828 pcbddc->new_primal_space_local = PETSC_TRUE; 2829 if (olocal_primal_size == pcbddc->local_primal_size) { 2830 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2831 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2832 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2833 } 2834 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2835 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2836 2837 /* flush dbg viewer */ 2838 if (pcbddc->dbg_flag) { 2839 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2840 } 2841 2842 /* free workspace */ 2843 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2844 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2845 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2846 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2847 if (!pcbddc->adaptive_selection) { 2848 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2849 ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr); 2850 } else { 2851 ierr = PetscFree4(pcbddc->adaptive_constraints_n, 2852 pcbddc->adaptive_constraints_ptrs, 2853 pcbddc->adaptive_constraints_idxs, 2854 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2855 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2856 } 2857 PetscFunctionReturn(0); 2858 } 2859 2860 #undef __FUNCT__ 2861 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2862 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2863 { 2864 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2865 PC_IS *pcis = (PC_IS*)pc->data; 2866 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2867 PetscInt ierr,i,vertex_size; 2868 PetscViewer viewer=pcbddc->dbg_viewer; 2869 2870 PetscFunctionBegin; 2871 /* Reset previously computed graph */ 2872 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2873 /* Init local Graph struct */ 2874 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2875 2876 /* Check validity of the csr graph passed in by the user */ 2877 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2878 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2879 } 2880 2881 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2882 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2883 PetscInt *xadj,*adjncy; 2884 PetscInt nvtxs; 2885 PetscBool flg_row=PETSC_FALSE; 2886 2887 if (pcbddc->use_local_adj) { 2888 2889 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2890 if (flg_row) { 2891 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2892 pcbddc->computed_rowadj = PETSC_TRUE; 2893 } 2894 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2895 } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */ 2896 IS is_dummy; 2897 ISLocalToGlobalMapping l2gmap_dummy; 2898 PetscInt j,sum; 2899 PetscInt *cxadj,*cadjncy; 2900 const PetscInt *idxs; 2901 PCBDDCGraph graph; 2902 PetscBT is_on_boundary; 2903 2904 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2905 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2906 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2907 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2908 ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr); 2909 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2910 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2911 if (flg_row) { 2912 graph->xadj = xadj; 2913 graph->adjncy = adjncy; 2914 } 2915 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2916 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2917 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2918 2919 if (pcbddc->dbg_flag) { 2920 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr); 2921 for (i=0;i<graph->ncc;i++) { 2922 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2923 } 2924 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2925 } 2926 2927 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2928 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2929 for (i=0;i<pcis->n_B;i++) { 2930 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2931 } 2932 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2933 2934 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2935 sum = 0; 2936 for (i=0;i<graph->ncc;i++) { 2937 PetscInt sizecc = 0; 2938 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2939 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2940 sizecc++; 2941 } 2942 } 2943 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2944 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2945 cxadj[graph->queue[j]] = sizecc; 2946 } 2947 } 2948 sum += sizecc*sizecc; 2949 } 2950 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2951 sum = 0; 2952 for (i=0;i<pcis->n;i++) { 2953 PetscInt temp = cxadj[i]; 2954 cxadj[i] = sum; 2955 sum += temp; 2956 } 2957 cxadj[pcis->n] = sum; 2958 for (i=0;i<graph->ncc;i++) { 2959 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2960 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2961 PetscInt k,sizecc = 0; 2962 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2963 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2964 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2965 sizecc++; 2966 } 2967 } 2968 } 2969 } 2970 } 2971 if (pcis->n) { 2972 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2973 } else { 2974 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2975 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2976 } 2977 graph->xadj = 0; 2978 graph->adjncy = 0; 2979 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2980 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2981 } 2982 } 2983 2984 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2985 vertex_size = 1; 2986 if (pcbddc->user_provided_isfordofs) { 2987 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2988 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2989 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2990 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2991 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2992 } 2993 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2994 pcbddc->n_ISForDofs = 0; 2995 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2996 } 2997 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2998 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2999 } else { 3000 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3001 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3002 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3003 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3004 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3005 } 3006 } 3007 } 3008 3009 /* Setup of Graph */ 3010 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3011 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3012 } 3013 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3014 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3015 } 3016 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices); 3017 3018 /* Graph's connected components analysis */ 3019 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3020 3021 /* print some info to stdout */ 3022 if (pcbddc->dbg_flag) { 3023 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 3024 } 3025 3026 /* mark topography has done */ 3027 pcbddc->recompute_topography = PETSC_FALSE; 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 3033 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 3034 { 3035 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3036 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 3037 PetscErrorCode ierr; 3038 3039 PetscFunctionBegin; 3040 n = 0; 3041 vertices = 0; 3042 if (pcbddc->ConstraintMatrix) { 3043 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 3044 for (i=0;i<local_primal_size;i++) { 3045 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3046 if (size_of_constraint == 1) n++; 3047 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3048 } 3049 if (vertices_idx) { 3050 ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr); 3051 n = 0; 3052 for (i=0;i<local_primal_size;i++) { 3053 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3054 if (size_of_constraint == 1) { 3055 vertices[n++]=row_cmat_indices[0]; 3056 } 3057 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3058 } 3059 } 3060 } 3061 *n_vertices = n; 3062 if (vertices_idx) *vertices_idx = vertices; 3063 PetscFunctionReturn(0); 3064 } 3065 3066 #undef __FUNCT__ 3067 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 3068 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 3069 { 3070 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 3071 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 3072 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 3073 PetscBT touched; 3074 PetscErrorCode ierr; 3075 3076 /* This function assumes that the number of local constraints per connected component 3077 is not greater than the number of nodes defined for the connected component 3078 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 3079 PetscFunctionBegin; 3080 n = 0; 3081 constraints_index = 0; 3082 if (pcbddc->ConstraintMatrix) { 3083 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 3084 max_size_of_constraint = 0; 3085 for (i=0;i<local_primal_size;i++) { 3086 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3087 if (size_of_constraint > 1) { 3088 n++; 3089 } 3090 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 3091 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 3092 } 3093 if (constraints_idx) { 3094 ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr); 3095 ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr); 3096 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 3097 n = 0; 3098 for (i=0;i<local_primal_size;i++) { 3099 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3100 if (size_of_constraint > 1) { 3101 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 3102 /* find first untouched local node */ 3103 j = 0; 3104 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 3105 min_index = row_cmat_global_indices[j]; 3106 min_loc = j; 3107 /* search the minimum among nodes not yet touched on the connected component 3108 since there can be more than one constraint on a single cc */ 3109 for (j=1;j<size_of_constraint;j++) { 3110 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 3111 min_index = row_cmat_global_indices[j]; 3112 min_loc = j; 3113 } 3114 } 3115 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 3116 constraints_index[n++] = row_cmat_indices[min_loc]; 3117 } 3118 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 3119 } 3120 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 3121 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 3122 } 3123 } 3124 *n_constraints = n; 3125 if (constraints_idx) *constraints_idx = constraints_index; 3126 PetscFunctionReturn(0); 3127 } 3128 3129 #undef __FUNCT__ 3130 #define __FUNCT__ "PCBDDCSubsetNumbering" 3131 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3132 { 3133 Vec local_vec,global_vec; 3134 IS seqis,paris; 3135 VecScatter scatter_ctx; 3136 PetscScalar *array; 3137 PetscInt *temp_global_dofs; 3138 PetscScalar globalsum; 3139 PetscInt i,j,s; 3140 PetscInt nlocals,first_index,old_index,max_local,max_global; 3141 PetscMPIInt rank_prec_comm,size_prec_comm; 3142 PetscInt *dof_sizes,*dof_displs; 3143 PetscBool first_found; 3144 PetscErrorCode ierr; 3145 3146 PetscFunctionBegin; 3147 /* mpi buffers */ 3148 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3149 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3150 j = ( !rank_prec_comm ? size_prec_comm : 0); 3151 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3152 /* get maximum size of subset */ 3153 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3154 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3155 max_local = 0; 3156 for (i=0;i<n_local_dofs;i++) { 3157 if (max_local < temp_global_dofs[i] ) { 3158 max_local = temp_global_dofs[i]; 3159 } 3160 } 3161 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3162 max_global++; 3163 max_local = 0; 3164 for (i=0;i<n_local_dofs;i++) { 3165 if (max_local < local_dofs[i] ) { 3166 max_local = local_dofs[i]; 3167 } 3168 } 3169 max_local++; 3170 /* allocate workspace */ 3171 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3172 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3173 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3174 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3175 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3176 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3177 /* create scatter */ 3178 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3179 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3180 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3181 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3182 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3183 /* init array */ 3184 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3185 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3186 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3187 if (local_dofs_mult) { 3188 for (i=0;i<n_local_dofs;i++) { 3189 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3190 } 3191 } else { 3192 for (i=0;i<n_local_dofs;i++) { 3193 array[local_dofs[i]]=1.0; 3194 } 3195 } 3196 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3197 /* scatter into global vec and get total number of global dofs */ 3198 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3199 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3200 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3201 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3202 /* Fill global_vec with cumulative function for global numbering */ 3203 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3204 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3205 nlocals = 0; 3206 first_index = -1; 3207 first_found = PETSC_FALSE; 3208 for (i=0;i<s;i++) { 3209 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3210 first_found = PETSC_TRUE; 3211 first_index = i; 3212 } 3213 nlocals += (PetscInt)PetscRealPart(array[i]); 3214 } 3215 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3216 if (!rank_prec_comm) { 3217 dof_displs[0]=0; 3218 for (i=1;i<size_prec_comm;i++) { 3219 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3220 } 3221 } 3222 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3223 if (first_found) { 3224 array[first_index] += (PetscScalar)nlocals; 3225 old_index = first_index; 3226 for (i=first_index+1;i<s;i++) { 3227 if (PetscRealPart(array[i]) > 0.1) { 3228 array[i] += array[old_index]; 3229 old_index = i; 3230 } 3231 } 3232 } 3233 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3234 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3235 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3236 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3237 /* get global ordering of local dofs */ 3238 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3239 if (local_dofs_mult) { 3240 for (i=0;i<n_local_dofs;i++) { 3241 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3242 } 3243 } else { 3244 for (i=0;i<n_local_dofs;i++) { 3245 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3246 } 3247 } 3248 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3249 /* free workspace */ 3250 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3251 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3252 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3253 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3254 /* return pointer to global ordering of local dofs */ 3255 *global_numbering_subset = temp_global_dofs; 3256 PetscFunctionReturn(0); 3257 } 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3261 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3262 { 3263 PetscInt i,j; 3264 PetscScalar *alphas; 3265 PetscErrorCode ierr; 3266 3267 PetscFunctionBegin; 3268 /* this implements stabilized Gram-Schmidt */ 3269 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3270 for (i=0;i<n;i++) { 3271 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3272 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3273 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3274 } 3275 ierr = PetscFree(alphas);CHKERRQ(ierr); 3276 PetscFunctionReturn(0); 3277 } 3278 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatISGetSubassemblingPattern" 3281 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3282 { 3283 Mat subdomain_adj; 3284 IS new_ranks,ranks_send_to; 3285 MatPartitioning partitioner; 3286 Mat_IS *matis; 3287 PetscInt n_neighs,*neighs,*n_shared,**shared; 3288 PetscInt prank; 3289 PetscMPIInt size,rank,color; 3290 PetscInt *xadj,*adjncy,*oldranks; 3291 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3292 PetscInt i,local_size,threshold=0; 3293 PetscErrorCode ierr; 3294 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3295 PetscSubcomm subcomm; 3296 3297 PetscFunctionBegin; 3298 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3299 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3300 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3301 3302 /* Get info on mapping */ 3303 matis = (Mat_IS*)(mat->data); 3304 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3305 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3306 3307 /* build local CSR graph of subdomains' connectivity */ 3308 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3309 xadj[0] = 0; 3310 xadj[1] = PetscMax(n_neighs-1,0); 3311 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3312 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3313 3314 if (threshold) { 3315 PetscInt xadj_count = 0; 3316 for (i=1;i<n_neighs;i++) { 3317 if (n_shared[i] > threshold) { 3318 adjncy[xadj_count] = neighs[i]; 3319 adjncy_wgt[xadj_count] = n_shared[i]; 3320 xadj_count++; 3321 } 3322 } 3323 xadj[1] = xadj_count; 3324 } else { 3325 if (xadj[1]) { 3326 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3327 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3328 } 3329 } 3330 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3331 if (use_square) { 3332 for (i=0;i<xadj[1];i++) { 3333 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3334 } 3335 } 3336 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3337 3338 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3339 3340 /* 3341 Restrict work on active processes only. 3342 */ 3343 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3344 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3345 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3346 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3347 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3348 if (color) { 3349 ierr = PetscFree(xadj);CHKERRQ(ierr); 3350 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3351 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3352 } else { 3353 PetscInt coarsening_ratio; 3354 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3355 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3356 prank = rank; 3357 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3358 /* 3359 for (i=0;i<size;i++) { 3360 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3361 } 3362 */ 3363 for (i=0;i<xadj[1];i++) { 3364 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3365 } 3366 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3367 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3368 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3369 3370 /* Partition */ 3371 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3372 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3373 if (use_vwgt) { 3374 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3375 v_wgt[0] = local_size; 3376 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3377 } 3378 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3379 coarsening_ratio = size/n_subdomains; 3380 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3381 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3382 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3383 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3384 3385 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3386 if (contiguous) { 3387 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3388 } else { 3389 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3390 } 3391 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3392 /* clean up */ 3393 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3394 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3395 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3396 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3397 } 3398 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3399 3400 /* assemble parallel IS for sends */ 3401 i = 1; 3402 if (color) i=0; 3403 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3404 3405 /* get back IS */ 3406 *is_sends = ranks_send_to; 3407 PetscFunctionReturn(0); 3408 } 3409 3410 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3411 3412 #undef __FUNCT__ 3413 #define __FUNCT__ "MatISSubassemble" 3414 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3415 { 3416 Mat local_mat; 3417 Mat_IS *matis; 3418 IS is_sends_internal; 3419 PetscInt rows,cols,new_local_rows; 3420 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3421 PetscBool ismatis,isdense,newisdense,destroy_mat; 3422 ISLocalToGlobalMapping l2gmap; 3423 PetscInt* l2gmap_indices; 3424 const PetscInt* is_indices; 3425 MatType new_local_type; 3426 /* buffers */ 3427 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3428 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3429 PetscInt *recv_buffer_idxs_local; 3430 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3431 /* MPI */ 3432 MPI_Comm comm,comm_n; 3433 PetscSubcomm subcomm; 3434 PetscMPIInt n_sends,n_recvs,commsize; 3435 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3436 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3437 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3438 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3439 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3440 PetscErrorCode ierr; 3441 3442 PetscFunctionBegin; 3443 /* TODO: add missing checks */ 3444 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3445 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3446 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3447 PetscValidLogicalCollectiveInt(mat,nis,7); 3448 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3449 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3450 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3451 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3452 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3453 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3454 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3455 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3456 PetscInt mrows,mcols,mnrows,mncols; 3457 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3458 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3459 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3460 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3461 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3462 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3463 } 3464 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3465 PetscValidLogicalCollectiveInt(mat,bs,0); 3466 /* prepare IS for sending if not provided */ 3467 if (!is_sends) { 3468 PetscBool pcontig = PETSC_TRUE; 3469 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3470 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3471 } else { 3472 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3473 is_sends_internal = is_sends; 3474 } 3475 3476 /* get pointer of MATIS data */ 3477 matis = (Mat_IS*)mat->data; 3478 3479 /* get comm */ 3480 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3481 3482 /* compute number of sends */ 3483 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3484 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3485 3486 /* compute number of receives */ 3487 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3488 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3489 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3490 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3491 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3492 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3493 ierr = PetscFree(iflags);CHKERRQ(ierr); 3494 3495 /* restrict comm if requested */ 3496 subcomm = 0; 3497 destroy_mat = PETSC_FALSE; 3498 if (restrict_comm) { 3499 PetscMPIInt color,subcommsize; 3500 3501 color = 0; 3502 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3503 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3504 subcommsize = commsize - subcommsize; 3505 /* check if reuse has been requested */ 3506 if (reuse == MAT_REUSE_MATRIX) { 3507 if (*mat_n) { 3508 PetscMPIInt subcommsize2; 3509 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3510 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3511 comm_n = PetscObjectComm((PetscObject)*mat_n); 3512 } else { 3513 comm_n = PETSC_COMM_SELF; 3514 } 3515 } else { /* MAT_INITIAL_MATRIX */ 3516 PetscMPIInt rank; 3517 3518 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3519 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3520 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3521 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3522 comm_n = PetscSubcommChild(subcomm); 3523 } 3524 /* flag to destroy *mat_n if not significative */ 3525 if (color) destroy_mat = PETSC_TRUE; 3526 } else { 3527 comm_n = comm; 3528 } 3529 3530 /* prepare send/receive buffers */ 3531 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3532 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3533 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3534 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3535 if (nis) { 3536 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3537 } 3538 3539 /* Get data from local matrices */ 3540 if (!isdense) { 3541 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3542 /* TODO: See below some guidelines on how to prepare the local buffers */ 3543 /* 3544 send_buffer_vals should contain the raw values of the local matrix 3545 send_buffer_idxs should contain: 3546 - MatType_PRIVATE type 3547 - PetscInt size_of_l2gmap 3548 - PetscInt global_row_indices[size_of_l2gmap] 3549 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3550 */ 3551 } else { 3552 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3553 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3554 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3555 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3556 send_buffer_idxs[1] = i; 3557 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3558 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3559 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3560 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3561 for (i=0;i<n_sends;i++) { 3562 ilengths_vals[is_indices[i]] = len*len; 3563 ilengths_idxs[is_indices[i]] = len+2; 3564 } 3565 } 3566 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3567 /* additional is (if any) */ 3568 if (nis) { 3569 PetscMPIInt psum; 3570 PetscInt j; 3571 for (j=0,psum=0;j<nis;j++) { 3572 PetscInt plen; 3573 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3574 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3575 psum += len+1; /* indices + lenght */ 3576 } 3577 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3578 for (j=0,psum=0;j<nis;j++) { 3579 PetscInt plen; 3580 const PetscInt *is_array_idxs; 3581 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3582 send_buffer_idxs_is[psum] = plen; 3583 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3584 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3585 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3586 psum += plen+1; /* indices + lenght */ 3587 } 3588 for (i=0;i<n_sends;i++) { 3589 ilengths_idxs_is[is_indices[i]] = psum; 3590 } 3591 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3592 } 3593 3594 buf_size_idxs = 0; 3595 buf_size_vals = 0; 3596 buf_size_idxs_is = 0; 3597 for (i=0;i<n_recvs;i++) { 3598 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3599 buf_size_vals += (PetscInt)olengths_vals[i]; 3600 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3601 } 3602 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3603 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3604 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3605 3606 /* get new tags for clean communications */ 3607 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3608 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3609 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3610 3611 /* allocate for requests */ 3612 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3613 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3614 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3615 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3616 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3617 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3618 3619 /* communications */ 3620 ptr_idxs = recv_buffer_idxs; 3621 ptr_vals = recv_buffer_vals; 3622 ptr_idxs_is = recv_buffer_idxs_is; 3623 for (i=0;i<n_recvs;i++) { 3624 source_dest = onodes[i]; 3625 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3626 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3627 ptr_idxs += olengths_idxs[i]; 3628 ptr_vals += olengths_vals[i]; 3629 if (nis) { 3630 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3631 ptr_idxs_is += olengths_idxs_is[i]; 3632 } 3633 } 3634 for (i=0;i<n_sends;i++) { 3635 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3636 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3637 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3638 if (nis) { 3639 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3640 } 3641 } 3642 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3643 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3644 3645 /* assemble new l2g map */ 3646 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3647 ptr_idxs = recv_buffer_idxs; 3648 new_local_rows = 0; 3649 for (i=0;i<n_recvs;i++) { 3650 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3651 ptr_idxs += olengths_idxs[i]; 3652 } 3653 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3654 ptr_idxs = recv_buffer_idxs; 3655 new_local_rows = 0; 3656 for (i=0;i<n_recvs;i++) { 3657 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3658 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3659 ptr_idxs += olengths_idxs[i]; 3660 } 3661 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3662 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3663 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3664 3665 /* infer new local matrix type from received local matrices type */ 3666 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3667 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3668 if (n_recvs) { 3669 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3670 ptr_idxs = recv_buffer_idxs; 3671 for (i=0;i<n_recvs;i++) { 3672 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3673 new_local_type_private = MATAIJ_PRIVATE; 3674 break; 3675 } 3676 ptr_idxs += olengths_idxs[i]; 3677 } 3678 switch (new_local_type_private) { 3679 case MATDENSE_PRIVATE: 3680 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3681 new_local_type = MATSEQAIJ; 3682 bs = 1; 3683 } else { /* if I receive only 1 dense matrix */ 3684 new_local_type = MATSEQDENSE; 3685 bs = 1; 3686 } 3687 break; 3688 case MATAIJ_PRIVATE: 3689 new_local_type = MATSEQAIJ; 3690 bs = 1; 3691 break; 3692 case MATBAIJ_PRIVATE: 3693 new_local_type = MATSEQBAIJ; 3694 break; 3695 case MATSBAIJ_PRIVATE: 3696 new_local_type = MATSEQSBAIJ; 3697 break; 3698 default: 3699 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3700 break; 3701 } 3702 } else { /* by default, new_local_type is seqdense */ 3703 new_local_type = MATSEQDENSE; 3704 bs = 1; 3705 } 3706 3707 /* create MATIS object if needed */ 3708 if (reuse == MAT_INITIAL_MATRIX) { 3709 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3710 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3711 } else { 3712 /* it also destroys the local matrices */ 3713 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3714 } 3715 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3716 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3717 3718 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3719 3720 /* Global to local map of received indices */ 3721 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3722 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3723 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3724 3725 /* restore attributes -> type of incoming data and its size */ 3726 buf_size_idxs = 0; 3727 for (i=0;i<n_recvs;i++) { 3728 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3729 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3730 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3731 } 3732 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3733 3734 /* set preallocation */ 3735 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3736 if (!newisdense) { 3737 PetscInt *new_local_nnz=0; 3738 3739 ptr_vals = recv_buffer_vals; 3740 ptr_idxs = recv_buffer_idxs_local; 3741 if (n_recvs) { 3742 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3743 } 3744 for (i=0;i<n_recvs;i++) { 3745 PetscInt j; 3746 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3747 for (j=0;j<*(ptr_idxs+1);j++) { 3748 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3749 } 3750 } else { 3751 /* TODO */ 3752 } 3753 ptr_idxs += olengths_idxs[i]; 3754 } 3755 if (new_local_nnz) { 3756 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3757 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3758 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3759 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3760 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3761 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3762 } else { 3763 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3764 } 3765 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3766 } else { 3767 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3768 } 3769 3770 /* set values */ 3771 ptr_vals = recv_buffer_vals; 3772 ptr_idxs = recv_buffer_idxs_local; 3773 for (i=0;i<n_recvs;i++) { 3774 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3775 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3776 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3777 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3778 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3779 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3780 } else { 3781 /* TODO */ 3782 } 3783 ptr_idxs += olengths_idxs[i]; 3784 ptr_vals += olengths_vals[i]; 3785 } 3786 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3787 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3788 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3789 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3790 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3791 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3792 3793 #if 0 3794 if (!restrict_comm) { /* check */ 3795 Vec lvec,rvec; 3796 PetscReal infty_error; 3797 3798 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3799 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3800 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3801 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3802 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3803 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3804 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3805 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3806 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3807 } 3808 #endif 3809 3810 /* assemble new additional is (if any) */ 3811 if (nis) { 3812 PetscInt **temp_idxs,*count_is,j,psum; 3813 3814 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3815 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3816 ptr_idxs = recv_buffer_idxs_is; 3817 psum = 0; 3818 for (i=0;i<n_recvs;i++) { 3819 for (j=0;j<nis;j++) { 3820 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3821 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3822 psum += plen; 3823 ptr_idxs += plen+1; /* shift pointer to received data */ 3824 } 3825 } 3826 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3827 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3828 for (i=1;i<nis;i++) { 3829 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3830 } 3831 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3832 ptr_idxs = recv_buffer_idxs_is; 3833 for (i=0;i<n_recvs;i++) { 3834 for (j=0;j<nis;j++) { 3835 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3836 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3837 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3838 ptr_idxs += plen+1; /* shift pointer to received data */ 3839 } 3840 } 3841 for (i=0;i<nis;i++) { 3842 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3843 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3844 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3845 } 3846 ierr = PetscFree(count_is);CHKERRQ(ierr); 3847 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3848 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3849 } 3850 /* free workspace */ 3851 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3852 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3853 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3854 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3855 if (isdense) { 3856 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3857 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3858 } else { 3859 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3860 } 3861 if (nis) { 3862 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3863 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3864 } 3865 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3866 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3867 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3868 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3869 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3870 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3871 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3872 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3873 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3874 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3875 ierr = PetscFree(onodes);CHKERRQ(ierr); 3876 if (nis) { 3877 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3878 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3879 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3880 } 3881 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3882 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3883 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3884 for (i=0;i<nis;i++) { 3885 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3886 } 3887 } 3888 PetscFunctionReturn(0); 3889 } 3890 3891 /* temporary hack into ksp private data structure */ 3892 #include <petsc-private/kspimpl.h> 3893 3894 #undef __FUNCT__ 3895 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3896 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3897 { 3898 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3899 PC_IS *pcis = (PC_IS*)pc->data; 3900 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3901 MatNullSpace CoarseNullSpace=NULL; 3902 ISLocalToGlobalMapping coarse_islg; 3903 IS coarse_is,*isarray; 3904 PetscInt i,im_active=-1,active_procs=-1; 3905 PetscInt nis,nisdofs,nisneu; 3906 PC pc_temp; 3907 PCType coarse_pc_type; 3908 KSPType coarse_ksp_type; 3909 PetscBool multilevel_requested,multilevel_allowed; 3910 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3911 Mat t_coarse_mat_is; 3912 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3913 PetscMPIInt all_procs; 3914 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3915 PetscBool compute_vecs = PETSC_FALSE; 3916 PetscScalar *array; 3917 PetscErrorCode ierr; 3918 3919 PetscFunctionBegin; 3920 /* Assign global numbering to coarse dofs */ 3921 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3922 PetscInt ocoarse_size; 3923 compute_vecs = PETSC_TRUE; 3924 ocoarse_size = pcbddc->coarse_size; 3925 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3926 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3927 /* see if we can avoid some work */ 3928 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3929 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3930 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3931 coarse_reuse = PETSC_FALSE; 3932 } else { /* we can safely reuse already computed coarse matrix */ 3933 coarse_reuse = PETSC_TRUE; 3934 } 3935 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3936 coarse_reuse = PETSC_FALSE; 3937 } 3938 /* reset any subassembling information */ 3939 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3940 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3941 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3942 coarse_reuse = PETSC_TRUE; 3943 } 3944 3945 /* count "active" (i.e. with positive local size) and "void" processes */ 3946 im_active = !!(pcis->n); 3947 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3948 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3949 void_procs = all_procs-active_procs; 3950 csin_type_simple = PETSC_TRUE; 3951 redist = PETSC_FALSE; 3952 if (pcbddc->current_level && void_procs) { 3953 csin_ml = PETSC_TRUE; 3954 ncoarse_ml = void_procs; 3955 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3956 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3957 csin_ds = PETSC_TRUE; 3958 ncoarse_ds = pcbddc->redistribute_coarse; 3959 redist = PETSC_TRUE; 3960 } else { 3961 csin_ds = PETSC_TRUE; 3962 ncoarse_ds = active_procs; 3963 redist = PETSC_TRUE; 3964 } 3965 } else { 3966 csin_ml = PETSC_FALSE; 3967 ncoarse_ml = all_procs; 3968 if (void_procs) { 3969 csin_ds = PETSC_TRUE; 3970 ncoarse_ds = void_procs; 3971 csin_type_simple = PETSC_FALSE; 3972 } else { 3973 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3974 csin_ds = PETSC_TRUE; 3975 ncoarse_ds = pcbddc->redistribute_coarse; 3976 redist = PETSC_TRUE; 3977 } else { 3978 csin_ds = PETSC_FALSE; 3979 ncoarse_ds = all_procs; 3980 } 3981 } 3982 } 3983 3984 /* 3985 test if we can go multilevel: three conditions must be satisfied: 3986 - we have not exceeded the number of levels requested 3987 - we can actually subassemble the active processes 3988 - we can find a suitable number of MPI processes where we can place the subassembled problem 3989 */ 3990 multilevel_allowed = PETSC_FALSE; 3991 multilevel_requested = PETSC_FALSE; 3992 if (pcbddc->current_level < pcbddc->max_levels) { 3993 multilevel_requested = PETSC_TRUE; 3994 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3995 multilevel_allowed = PETSC_FALSE; 3996 } else { 3997 multilevel_allowed = PETSC_TRUE; 3998 } 3999 } 4000 /* determine number of process partecipating to coarse solver */ 4001 if (multilevel_allowed) { 4002 ncoarse = ncoarse_ml; 4003 csin = csin_ml; 4004 redist = PETSC_FALSE; 4005 } else { 4006 ncoarse = ncoarse_ds; 4007 csin = csin_ds; 4008 } 4009 4010 /* creates temporary l2gmap and IS for coarse indexes */ 4011 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4012 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4013 4014 /* creates temporary MATIS object for coarse matrix */ 4015 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4016 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4017 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4018 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4019 #if 0 4020 { 4021 PetscViewer viewer; 4022 char filename[256]; 4023 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 4024 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4025 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4026 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4027 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4028 } 4029 #endif 4030 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 4031 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4032 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4033 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4034 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4035 4036 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4037 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4038 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4039 const PetscInt *idxs; 4040 ISLocalToGlobalMapping tmap; 4041 4042 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4043 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4044 /* allocate space for temporary storage */ 4045 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4046 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4047 /* allocate for IS array */ 4048 nisdofs = pcbddc->n_ISForDofsLocal; 4049 nisneu = !!pcbddc->NeumannBoundariesLocal; 4050 nis = nisdofs + nisneu; 4051 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4052 /* dofs splitting */ 4053 for (i=0;i<nisdofs;i++) { 4054 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4055 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4056 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4057 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4058 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4059 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4060 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4061 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4062 } 4063 /* neumann boundaries */ 4064 if (pcbddc->NeumannBoundariesLocal) { 4065 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4066 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4067 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4068 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4069 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4070 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4071 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4072 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4073 } 4074 /* free memory */ 4075 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4076 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4077 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4078 } else { 4079 nis = 0; 4080 nisdofs = 0; 4081 nisneu = 0; 4082 isarray = NULL; 4083 } 4084 /* destroy no longer needed map */ 4085 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4086 4087 /* restrict on coarse candidates (if needed) */ 4088 coarse_mat_is = NULL; 4089 if (csin) { 4090 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4091 if (redist) { 4092 PetscMPIInt rank; 4093 PetscInt spc,n_spc_p1,dest[1],destsize; 4094 4095 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4096 spc = active_procs/ncoarse; 4097 n_spc_p1 = active_procs%ncoarse; 4098 if (im_active) { 4099 destsize = 1; 4100 if (rank > n_spc_p1*(spc+1)-1) { 4101 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4102 } else { 4103 dest[0] = rank/(spc+1); 4104 } 4105 } else { 4106 destsize = 0; 4107 } 4108 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4109 } else if (csin_type_simple) { 4110 PetscMPIInt rank; 4111 PetscInt issize,isidx; 4112 4113 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4114 if (im_active) { 4115 issize = 1; 4116 isidx = (PetscInt)rank; 4117 } else { 4118 issize = 0; 4119 isidx = -1; 4120 } 4121 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4122 } else { /* get a suitable subassembling pattern from MATIS code */ 4123 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4124 } 4125 4126 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4127 if (!redist || ncoarse <= void_procs) { 4128 PetscInt ncoarse_cand,tissize,*nisindices; 4129 PetscInt *coarse_candidates; 4130 const PetscInt* tisindices; 4131 4132 /* get coarse candidates' ranks in pc communicator */ 4133 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4134 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4135 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4136 if (!coarse_candidates[i]) { 4137 coarse_candidates[ncoarse_cand++]=i; 4138 } 4139 } 4140 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4141 4142 4143 if (pcbddc->dbg_flag) { 4144 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4146 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4147 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4148 for (i=0;i<ncoarse_cand;i++) { 4149 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4150 } 4151 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4153 } 4154 /* shift the pattern on coarse candidates */ 4155 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4156 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4157 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4158 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4159 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4160 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4161 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4162 } 4163 if (pcbddc->dbg_flag) { 4164 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4166 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4167 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4168 } 4169 } 4170 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4171 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4172 } else { 4173 if (pcbddc->dbg_flag) { 4174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4175 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4176 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4177 } 4178 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4179 coarse_mat_is = t_coarse_mat_is; 4180 } 4181 4182 /* create local to global scatters for coarse problem */ 4183 if (compute_vecs) { 4184 PetscInt lrows; 4185 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4186 if (coarse_mat_is) { 4187 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4188 } else { 4189 lrows = 0; 4190 } 4191 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4192 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4193 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4194 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4195 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4196 } 4197 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4198 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4199 4200 /* set defaults for coarse KSP and PC */ 4201 if (multilevel_allowed) { 4202 coarse_ksp_type = KSPRICHARDSON; 4203 coarse_pc_type = PCBDDC; 4204 } else { 4205 coarse_ksp_type = KSPPREONLY; 4206 coarse_pc_type = PCREDUNDANT; 4207 } 4208 4209 /* print some info if requested */ 4210 if (pcbddc->dbg_flag) { 4211 if (!multilevel_allowed) { 4212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4213 if (multilevel_requested) { 4214 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4215 } else if (pcbddc->max_levels) { 4216 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4217 } 4218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4219 } 4220 } 4221 4222 /* create the coarse KSP object only once with defaults */ 4223 if (coarse_mat_is) { 4224 MatReuse coarse_mat_reuse; 4225 PetscViewer dbg_viewer = NULL; 4226 if (pcbddc->dbg_flag) { 4227 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4228 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4229 } 4230 if (!pcbddc->coarse_ksp) { 4231 char prefix[256],str_level[16]; 4232 size_t len; 4233 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4234 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4235 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4236 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4237 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4238 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4239 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4240 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4241 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4242 /* prefix */ 4243 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4244 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4245 if (!pcbddc->current_level) { 4246 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4247 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4248 } else { 4249 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4250 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4251 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4252 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4253 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4254 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4255 } 4256 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4257 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4258 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4259 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4260 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4261 /* allow user customization */ 4262 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4263 } 4264 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4265 if (nisdofs) { 4266 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4267 for (i=0;i<nisdofs;i++) { 4268 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4269 } 4270 } 4271 if (nisneu) { 4272 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4273 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4274 } 4275 4276 /* get some info after set from options */ 4277 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4278 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4279 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4280 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4281 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4282 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4283 isbddc = PETSC_FALSE; 4284 } 4285 if (isredundant) { 4286 KSP inner_ksp; 4287 PC inner_pc; 4288 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4289 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4290 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4291 } 4292 4293 /* assemble coarse matrix */ 4294 if (coarse_reuse) { 4295 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4296 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4297 coarse_mat_reuse = MAT_REUSE_MATRIX; 4298 } else { 4299 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4300 } 4301 if (isbddc || isnn) { 4302 if (pcbddc->coarsening_ratio > 1) { 4303 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4304 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4305 if (pcbddc->dbg_flag) { 4306 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4307 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4308 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4309 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4310 } 4311 } 4312 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4313 } else { 4314 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4315 coarse_mat = coarse_mat_is; 4316 } 4317 } else { 4318 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4319 } 4320 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4321 4322 /* propagate symmetry info to coarse matrix */ 4323 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4324 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4325 4326 /* set operators */ 4327 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4328 if (pcbddc->dbg_flag) { 4329 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4330 } 4331 } else { /* processes non partecipating to coarse solver (if any) */ 4332 coarse_mat = 0; 4333 } 4334 ierr = PetscFree(isarray);CHKERRQ(ierr); 4335 #if 0 4336 { 4337 PetscViewer viewer; 4338 char filename[256]; 4339 sprintf(filename,"coarse_mat.m"); 4340 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4341 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4342 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4343 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4344 } 4345 #endif 4346 4347 /* Compute coarse null space (special handling by BDDC only) */ 4348 if (pcbddc->NullSpace) { 4349 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4350 } 4351 4352 if (pcbddc->coarse_ksp) { 4353 Vec crhs,csol; 4354 PetscBool ispreonly; 4355 if (CoarseNullSpace) { 4356 if (isbddc) { 4357 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4358 } else { 4359 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4360 } 4361 } 4362 /* setup coarse ksp */ 4363 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4364 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4365 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4366 /* hack */ 4367 if (!csol) { 4368 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4369 } 4370 if (!crhs) { 4371 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4372 } 4373 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4374 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4375 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4376 KSP check_ksp; 4377 KSPType check_ksp_type; 4378 PC check_pc; 4379 Vec check_vec,coarse_vec; 4380 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4381 PetscInt its; 4382 PetscBool compute_eigs; 4383 PetscReal *eigs_r,*eigs_c; 4384 PetscInt neigs; 4385 const char *prefix; 4386 4387 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4388 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4389 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4390 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4391 if (ispreonly) { 4392 check_ksp_type = KSPPREONLY; 4393 compute_eigs = PETSC_FALSE; 4394 } else { 4395 check_ksp_type = KSPGMRES; 4396 compute_eigs = PETSC_TRUE; 4397 } 4398 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4399 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4400 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4401 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4402 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4403 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4404 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4405 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4406 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4407 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4408 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4409 /* create random vec */ 4410 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4411 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4412 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4413 if (CoarseNullSpace) { 4414 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4415 } 4416 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4417 /* solve coarse problem */ 4418 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4419 if (CoarseNullSpace) { 4420 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4421 } 4422 /* set eigenvalue estimation if preonly has not been requested */ 4423 if (compute_eigs) { 4424 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4425 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4426 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4427 lambda_max = eigs_r[neigs-1]; 4428 lambda_min = eigs_r[0]; 4429 if (pcbddc->use_coarse_estimates) { 4430 if (lambda_max>lambda_min) { 4431 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4432 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4433 } 4434 } 4435 } 4436 4437 /* check coarse problem residual error */ 4438 if (pcbddc->dbg_flag) { 4439 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4440 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4441 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4442 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4443 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4444 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4445 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4446 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4447 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4448 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4449 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4450 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4451 if (compute_eigs) { 4452 PetscReal lambda_max_s,lambda_min_s; 4453 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4454 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4455 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4456 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4457 for (i=0;i<neigs;i++) { 4458 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4459 } 4460 } 4461 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4462 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4463 } 4464 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4465 if (compute_eigs) { 4466 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4467 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4468 } 4469 } 4470 } 4471 /* print additional info */ 4472 if (pcbddc->dbg_flag) { 4473 /* waits until all processes reaches this point */ 4474 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4476 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4477 } 4478 4479 /* free memory */ 4480 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4481 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4482 PetscFunctionReturn(0); 4483 } 4484 4485 #undef __FUNCT__ 4486 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4487 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4488 { 4489 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4490 PC_IS* pcis = (PC_IS*)pc->data; 4491 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4492 PetscInt i,coarse_size=0; 4493 PetscInt *local_primal_indices=NULL; 4494 PetscErrorCode ierr; 4495 4496 PetscFunctionBegin; 4497 /* Compute global number of coarse dofs */ 4498 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4499 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4500 } 4501 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4502 4503 /* check numbering */ 4504 if (pcbddc->dbg_flag) { 4505 PetscScalar coarsesum,*array; 4506 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4507 4508 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4509 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4510 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4511 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4512 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4513 for (i=0;i<pcbddc->local_primal_size;i++) { 4514 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4515 } 4516 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4517 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4518 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4519 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4520 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4521 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4522 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4523 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4524 for (i=0;i<pcis->n;i++) { 4525 if (array[i] == 1.0) { 4526 set_error = PETSC_TRUE; 4527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4528 } 4529 } 4530 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4531 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4532 for (i=0;i<pcis->n;i++) { 4533 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4534 } 4535 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4536 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4537 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4538 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4539 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4540 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4541 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4542 PetscInt *gidxs; 4543 4544 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4545 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4546 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4548 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4549 for (i=0;i<pcbddc->local_primal_size;i++) { 4550 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]); 4551 } 4552 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4553 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4554 } 4555 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4556 if (set_error_reduced) { 4557 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4558 } 4559 } 4560 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4561 /* get back data */ 4562 *coarse_size_n = coarse_size; 4563 *local_primal_indices_n = local_primal_indices; 4564 PetscFunctionReturn(0); 4565 } 4566 4567 #undef __FUNCT__ 4568 #define __FUNCT__ "PCBDDCGlobalToLocal" 4569 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4570 { 4571 IS localis_t; 4572 PetscInt i,lsize,*idxs,n; 4573 PetscScalar *vals; 4574 PetscErrorCode ierr; 4575 4576 PetscFunctionBegin; 4577 /* get indices in local ordering exploiting local to global map */ 4578 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4579 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4580 for (i=0;i<lsize;i++) vals[i] = 1.0; 4581 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4582 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4583 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4584 if (idxs) { /* multilevel guard */ 4585 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4586 } 4587 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4588 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4589 ierr = PetscFree(vals);CHKERRQ(ierr); 4590 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4591 /* now compute set in local ordering */ 4592 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4593 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4594 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4595 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4596 for (i=0,lsize=0;i<n;i++) { 4597 if (PetscRealPart(vals[i]) > 0.5) { 4598 lsize++; 4599 } 4600 } 4601 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4602 for (i=0,lsize=0;i<n;i++) { 4603 if (PetscRealPart(vals[i]) > 0.5) { 4604 idxs[lsize++] = i; 4605 } 4606 } 4607 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4608 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4609 *localis = localis_t; 4610 PetscFunctionReturn(0); 4611 } 4612 4613 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4614 #undef __FUNCT__ 4615 #define __FUNCT__ "PCBDDCMatMult_Private" 4616 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4617 { 4618 PCBDDCChange_ctx change_ctx; 4619 PetscErrorCode ierr; 4620 4621 PetscFunctionBegin; 4622 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4623 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4624 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4625 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4626 PetscFunctionReturn(0); 4627 } 4628 4629 #undef __FUNCT__ 4630 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4631 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4632 { 4633 PCBDDCChange_ctx change_ctx; 4634 PetscErrorCode ierr; 4635 4636 PetscFunctionBegin; 4637 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4638 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4639 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4640 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4641 PetscFunctionReturn(0); 4642 } 4643 4644 #undef __FUNCT__ 4645 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4646 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4647 { 4648 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4649 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4650 PetscInt *used_xadj,*used_adjncy; 4651 PetscBool free_used_adj; 4652 PetscErrorCode ierr; 4653 4654 PetscFunctionBegin; 4655 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4656 free_used_adj = PETSC_FALSE; 4657 if (pcbddc->sub_schurs_layers == -1) { 4658 used_xadj = NULL; 4659 used_adjncy = NULL; 4660 } else { 4661 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4662 used_xadj = pcbddc->mat_graph->xadj; 4663 used_adjncy = pcbddc->mat_graph->adjncy; 4664 } else if (pcbddc->computed_rowadj) { 4665 used_xadj = pcbddc->mat_graph->xadj; 4666 used_adjncy = pcbddc->mat_graph->adjncy; 4667 } else { 4668 PetscBool flg_row=PETSC_FALSE; 4669 const PetscInt *xadj,*adjncy; 4670 PetscInt nvtxs; 4671 4672 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4673 if (flg_row) { 4674 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4675 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4676 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4677 free_used_adj = PETSC_TRUE; 4678 } else { 4679 pcbddc->sub_schurs_layers = -1; 4680 used_xadj = NULL; 4681 used_adjncy = NULL; 4682 } 4683 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4684 } 4685 } 4686 ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4687 4688 /* free adjacency */ 4689 if (free_used_adj) { 4690 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4691 } 4692 PetscFunctionReturn(0); 4693 } 4694 4695 #undef __FUNCT__ 4696 #define __FUNCT__ "PCBDDCInitSubSchurs" 4697 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4698 { 4699 PC_IS *pcis=(PC_IS*)pc->data; 4700 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4701 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4702 PCBDDCGraph graph; 4703 Mat S_j; 4704 PetscErrorCode ierr; 4705 4706 PetscFunctionBegin; 4707 /* attach interface graph for determining subsets */ 4708 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4709 IS verticesIS; 4710 4711 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4712 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4713 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr); 4714 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4715 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4716 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4717 /* 4718 if (pcbddc->dbg_flag) { 4719 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4720 } 4721 */ 4722 } else { 4723 graph = pcbddc->mat_graph; 4724 } 4725 4726 /* Create Schur complement matrix */ 4727 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4728 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4729 4730 /* sub_schurs init */ 4731 ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4732 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4733 /* free graph struct */ 4734 if (pcbddc->sub_schurs_rebuild) { 4735 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4736 } 4737 PetscFunctionReturn(0); 4738 } 4739