1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */ 112 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 if (pcbddc->local_auxmat2) { 434 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 435 ierr = PetscFree(array);CHKERRQ(ierr); 436 } 437 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 438 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 440 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 441 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 444 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 447 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 450 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 451 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 458 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 459 { 460 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 461 PC_IS *pcis = (PC_IS*)pc->data; 462 VecType impVecType; 463 PetscInt n_constraints,n_R,old_size; 464 PetscErrorCode ierr; 465 466 PetscFunctionBegin; 467 if (!pcbddc->ConstraintMatrix) { 468 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 469 } 470 /* get sizes */ 471 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 472 n_R = pcis->n-pcbddc->n_vertices; 473 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 474 /* local work vectors (try to avoid unneeded work)*/ 475 /* R nodes */ 476 old_size = -1; 477 if (pcbddc->vec1_R) { 478 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 479 } 480 if (n_R != old_size) { 481 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 482 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 483 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 484 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 485 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 486 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 487 } 488 /* local primal dofs */ 489 old_size = -1; 490 if (pcbddc->vec1_P) { 491 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 492 } 493 if (pcbddc->local_primal_size != old_size) { 494 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 495 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 496 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 497 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 498 } 499 /* local explicit constraints */ 500 old_size = -1; 501 if (pcbddc->vec1_C) { 502 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 503 } 504 if (n_constraints && n_constraints != old_size) { 505 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 506 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 507 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 508 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 509 } 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "PCBDDCSetUpCorrection" 515 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 516 { 517 PetscErrorCode ierr; 518 /* pointers to pcis and pcbddc */ 519 PC_IS* pcis = (PC_IS*)pc->data; 520 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 521 /* submatrices of local problem */ 522 Mat A_RV,A_VR,A_VV; 523 /* submatrices of local coarse problem */ 524 Mat S_VV,S_CV,S_VC,S_CC; 525 /* working matrices */ 526 Mat C_CR; 527 /* additional working stuff */ 528 PC pc_R; 529 Mat F; 530 PetscBool isLU,isCHOL,isILU; 531 532 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 533 PetscScalar *work; 534 PetscInt *idx_V_B; 535 PetscInt n,n_vertices,n_constraints; 536 PetscInt i,n_R,n_D,n_B; 537 PetscBool unsymmetric_check; 538 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 539 MatType impMatType; 540 /* some shortcuts to scalars */ 541 PetscScalar one=1.0,m_one=-1.0; 542 543 PetscFunctionBegin; 544 n_vertices = pcbddc->n_vertices; 545 n_constraints = pcbddc->local_primal_size-n_vertices; 546 /* Set Non-overlapping dimensions */ 547 n_B = pcis->n_B; 548 n_D = pcis->n - n_B; 549 n_R = pcis->n - n_vertices; 550 551 /* Set types for local objects needed by BDDC precondtioner */ 552 impMatType = MATSEQDENSE; 553 554 /* vertices in boundary numbering */ 555 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 556 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr); 557 if (i != n_vertices) { 558 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 559 } 560 561 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 562 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 563 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 564 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 565 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 566 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 567 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 568 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 569 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 570 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 571 572 unsymmetric_check = PETSC_FALSE; 573 /* allocate workspace */ 574 n = 0; 575 if (n_constraints) { 576 n += n_R*n_constraints; 577 } 578 if (n_vertices) { 579 n = PetscMax(2*n_R*n_vertices,n); 580 } 581 if (!pcbddc->issym) { 582 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 583 unsymmetric_check = PETSC_TRUE; 584 } 585 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 586 587 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 588 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 591 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 592 if (isLU || isILU || isCHOL) { 593 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 594 } else { 595 F = NULL; 596 } 597 598 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 599 if (n_constraints) { 600 Mat M1,M2,M3; 601 IS is_aux; 602 /* see if we can save some allocations */ 603 if (pcbddc->local_auxmat2) { 604 PetscInt on_R,on_constraints; 605 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 606 if (on_R != n_R || on_constraints != n_constraints) { 607 PetscScalar *marray; 608 609 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 610 ierr = PetscFree(marray);CHKERRQ(ierr); 611 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 613 } 614 } 615 /* auxiliary matrices */ 616 if (!pcbddc->local_auxmat2) { 617 PetscScalar *marray; 618 619 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 620 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 621 marray += n_R*n_constraints; 622 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 623 } 624 625 /* Extract constraints on R nodes: C_{CR} */ 626 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 627 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 628 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 629 630 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 631 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 632 for (i=0;i<n_constraints;i++) { 633 const PetscScalar *row_cmat_values; 634 const PetscInt *row_cmat_indices; 635 PetscInt size_of_constraint,j; 636 637 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 638 for (j=0;j<size_of_constraint;j++) { 639 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 640 } 641 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 642 } 643 if (F) { 644 Mat B; 645 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 647 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 648 ierr = MatDestroy(&B);CHKERRQ(ierr); 649 } else { 650 PetscScalar *xarray; 651 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 652 for (i=0;i<n_constraints;i++) { 653 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 654 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 655 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 656 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 657 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 658 } 659 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 660 } 661 662 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 663 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 664 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 665 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 666 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 667 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 668 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 669 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 670 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 671 ierr = MatDestroy(&M2);CHKERRQ(ierr); 672 ierr = MatDestroy(&M3);CHKERRQ(ierr); 673 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 674 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 675 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 676 ierr = MatDestroy(&M1);CHKERRQ(ierr); 677 } 678 /* Get submatrices from subdomain matrix */ 679 if (n_vertices) { 680 Mat newmat; 681 IS is_aux; 682 PetscInt ibs,mbs; 683 PetscBool issbaij; 684 685 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 686 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 687 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 688 if (ibs != mbs) { /* need to convert to SEQAIJ */ 689 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 690 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 691 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 692 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 693 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 694 } else { 695 /* this is safe */ 696 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 697 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 698 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 699 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 700 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 701 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 702 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 703 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 704 } else { 705 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 706 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 707 } 708 } 709 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 710 } 711 712 /* Matrix of coarse basis functions (local) */ 713 if (pcbddc->coarse_phi_B) { 714 PetscInt on_B,on_primal,on_D=n_D; 715 if (pcbddc->coarse_phi_D) { 716 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 717 } 718 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 719 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 720 PetscScalar *marray; 721 722 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 723 ierr = PetscFree(marray);CHKERRQ(ierr); 724 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 725 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 728 } 729 } 730 731 if (!pcbddc->coarse_phi_B) { 732 PetscScalar *marray; 733 734 n = n_B*pcbddc->local_primal_size; 735 if (pcbddc->switch_static || pcbddc->dbg_flag) { 736 n += n_D*pcbddc->local_primal_size; 737 } 738 if (!pcbddc->issym) { 739 n *= 2; 740 } 741 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 742 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 743 n = n_B*pcbddc->local_primal_size; 744 if (pcbddc->switch_static || pcbddc->dbg_flag) { 745 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 746 n += n_D*pcbddc->local_primal_size; 747 } 748 if (!pcbddc->issym) { 749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 750 if (pcbddc->switch_static || pcbddc->dbg_flag) { 751 n = n_B*pcbddc->local_primal_size; 752 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 753 } 754 } else { 755 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 756 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 757 if (pcbddc->switch_static || pcbddc->dbg_flag) { 758 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 759 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 760 } 761 } 762 } 763 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 764 /* vertices */ 765 if (n_vertices) { 766 767 if (n_R) { 768 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 769 PetscBLASInt B_N,B_one = 1; 770 PetscScalar *x,*y; 771 772 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 773 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 774 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 775 if (F) { 776 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 777 } else { 778 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 779 for (i=0;i<n_vertices;i++) { 780 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 781 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 782 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 783 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 784 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 785 } 786 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 787 } 788 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 789 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 790 if (n_constraints) { 791 Mat B; 792 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 793 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 794 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 795 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 796 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 797 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 798 ierr = MatDestroy(&B);CHKERRQ(ierr); 799 } 800 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 801 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 802 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 803 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 804 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 805 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 806 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 807 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 808 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 809 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 810 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 811 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 812 } else { 813 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 814 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 815 } 816 /* coarse basis functions */ 817 for (i=0;i<n_vertices;i++) { 818 PetscScalar *y; 819 820 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 821 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 822 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 823 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 y[n_B*i+idx_V_B[i]] = 1.0; 826 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 827 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 828 829 if (pcbddc->switch_static || pcbddc->dbg_flag) { 830 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 831 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 832 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 833 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 835 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 836 } 837 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 838 } 839 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 840 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 841 } 842 843 if (n_constraints) { 844 Mat B; 845 846 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 847 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 848 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 849 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 850 if (n_vertices) { 851 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 852 } 853 ierr = MatDestroy(&B);CHKERRQ(ierr); 854 /* coarse basis functions */ 855 for (i=0;i<n_constraints;i++) { 856 PetscScalar *y; 857 858 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 859 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 860 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 861 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 862 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 864 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 865 866 if (pcbddc->switch_static || pcbddc->dbg_flag) { 867 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 868 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 869 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 872 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 873 } 874 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 875 } 876 } 877 878 /* compute other basis functions for non-symmetric problems */ 879 if (!pcbddc->issym) { 880 Mat B,X; 881 882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 883 884 if (n_constraints) { 885 Mat S_CCT,B_C; 886 887 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 888 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 889 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 890 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 891 if (n_vertices) { 892 Mat B_V,S_VCT; 893 894 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 895 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 896 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 897 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 898 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 899 } 900 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 901 } 902 if (n_vertices && n_R) { 903 Mat A_VRT; 904 PetscBLASInt B_N,B_one = 1; 905 906 if (!n_constraints) { /* if there are no constraints, reset work */ 907 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 908 } 909 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 910 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 911 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 912 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 913 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 914 } 915 916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 917 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 918 for (i=0;i<pcbddc->local_primal_size;i++) { 919 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 920 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 921 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 922 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 923 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 924 } 925 } else { 926 for (i=0;i<pcbddc->local_primal_size;i++) { 927 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 928 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 929 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 930 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 931 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 932 } 933 } 934 ierr = MatDestroy(&B);CHKERRQ(ierr); 935 /* coarse basis functions */ 936 for (i=0;i<pcbddc->local_primal_size;i++) { 937 PetscScalar *y; 938 939 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 940 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 941 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 942 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 if (i<n_vertices) { 945 y[n_B*i+idx_V_B[i]] = 1.0; 946 } 947 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 948 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 949 950 if (pcbddc->switch_static || pcbddc->dbg_flag) { 951 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 952 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 953 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 956 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 957 } 958 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 959 } 960 ierr = MatDestroy(&X);CHKERRQ(ierr); 961 } 962 963 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 964 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 965 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 966 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 967 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 968 /* Checking coarse_sub_mat and coarse basis functios */ 969 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 970 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 971 if (pcbddc->dbg_flag) { 972 Mat coarse_sub_mat; 973 Mat AUXMAT,TM1,TM2,TM3,TM4; 974 Mat coarse_phi_D,coarse_phi_B; 975 Mat coarse_psi_D,coarse_psi_B; 976 Mat A_II,A_BB,A_IB,A_BI; 977 Mat C_B,CPHI; 978 IS is_dummy; 979 Vec mones; 980 MatType checkmattype=MATSEQAIJ; 981 PetscReal real_value; 982 983 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 984 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 985 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 986 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 987 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 988 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 989 if (unsymmetric_check) { 990 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 991 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 992 } 993 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 994 995 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 996 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 997 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 998 if (unsymmetric_check) { 999 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1000 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1001 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1002 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1003 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1004 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1005 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1006 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1007 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1008 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1009 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1010 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1011 } else { 1012 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1013 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1014 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1015 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1016 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1017 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1018 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1019 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1020 } 1021 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1022 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1023 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1024 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1025 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1026 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1027 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1029 1030 /* check constraints */ 1031 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1032 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1033 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1034 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1035 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1036 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1037 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1039 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1040 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1041 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1042 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1043 1044 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1045 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1046 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1047 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1048 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1049 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1050 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1051 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1052 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1053 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1054 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1055 if (unsymmetric_check) { 1056 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1057 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1058 } 1059 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1060 } 1061 1062 /* free memory */ 1063 ierr = PetscFree(work);CHKERRQ(ierr); 1064 if (n_vertices) { 1065 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1066 } 1067 if (n_constraints) { 1068 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1069 } 1070 /* get back data */ 1071 *coarse_submat_vals_n = coarse_submat_vals; 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1077 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1078 { 1079 Mat *work_mat; 1080 IS isrow_s,iscol_s; 1081 PetscBool rsorted,csorted; 1082 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1083 PetscErrorCode ierr; 1084 1085 PetscFunctionBegin; 1086 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1087 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1088 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1089 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1090 1091 if (!rsorted) { 1092 const PetscInt *idxs; 1093 PetscInt *idxs_sorted,i; 1094 1095 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1096 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1097 for (i=0;i<rsize;i++) { 1098 idxs_perm_r[i] = i; 1099 } 1100 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1101 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1102 for (i=0;i<rsize;i++) { 1103 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1104 } 1105 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1107 } else { 1108 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1109 isrow_s = isrow; 1110 } 1111 1112 if (!csorted) { 1113 if (isrow == iscol) { 1114 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1115 iscol_s = isrow_s; 1116 } else { 1117 const PetscInt *idxs; 1118 PetscInt *idxs_sorted,i; 1119 1120 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1121 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1122 for (i=0;i<csize;i++) { 1123 idxs_perm_c[i] = i; 1124 } 1125 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1126 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1127 for (i=0;i<csize;i++) { 1128 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1129 } 1130 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1131 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1132 } 1133 } else { 1134 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1135 iscol_s = iscol; 1136 } 1137 1138 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1139 1140 if (!rsorted || !csorted) { 1141 Mat new_mat; 1142 IS is_perm_r,is_perm_c; 1143 1144 if (!rsorted) { 1145 PetscInt *idxs_r,i; 1146 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1147 for (i=0;i<rsize;i++) { 1148 idxs_r[idxs_perm_r[i]] = i; 1149 } 1150 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1151 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1152 } else { 1153 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1154 } 1155 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1156 1157 if (!csorted) { 1158 if (isrow_s == iscol_s) { 1159 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1160 is_perm_c = is_perm_r; 1161 } else { 1162 PetscInt *idxs_c,i; 1163 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1164 for (i=0;i<csize;i++) { 1165 idxs_c[idxs_perm_c[i]] = i; 1166 } 1167 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1168 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1169 } 1170 } else { 1171 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1172 } 1173 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1174 1175 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1176 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1177 work_mat[0] = new_mat; 1178 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1179 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1180 } 1181 1182 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1183 *B = work_mat[0]; 1184 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1185 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1186 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1192 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1193 { 1194 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1195 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1196 Mat new_mat; 1197 IS is_local,is_global; 1198 PetscInt local_size; 1199 PetscBool isseqaij; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1204 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1205 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1206 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1207 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1208 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1209 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1210 1211 /* check */ 1212 if (pcbddc->dbg_flag) { 1213 Vec x,x_change; 1214 PetscReal error; 1215 1216 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1217 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1218 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1219 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1220 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1221 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1222 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1223 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1225 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1226 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1227 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1228 ierr = VecDestroy(&x);CHKERRQ(ierr); 1229 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1230 } 1231 1232 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1233 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1234 if (isseqaij) { 1235 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1236 } else { 1237 Mat work_mat; 1238 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1239 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1240 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1241 } 1242 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1243 #if !defined(PETSC_USE_COMPLEX) 1244 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1245 #endif 1246 /* 1247 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1248 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1249 */ 1250 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1256 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1257 { 1258 PC_IS* pcis = (PC_IS*)(pc->data); 1259 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1260 IS is_aux1,is_aux2; 1261 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1262 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1263 PetscInt vbs,bs; 1264 PetscBT bitmask; 1265 PetscErrorCode ierr; 1266 1267 PetscFunctionBegin; 1268 /* 1269 No need to setup local scatters if 1270 - primal space is unchanged 1271 AND 1272 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1273 AND 1274 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1275 */ 1276 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1277 PetscFunctionReturn(0); 1278 } 1279 /* destroy old objects */ 1280 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1281 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1282 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1283 /* Set Non-overlapping dimensions */ 1284 n_B = pcis->n_B; 1285 n_D = pcis->n - n_B; 1286 n_vertices = pcbddc->n_vertices; 1287 /* create auxiliary bitmask */ 1288 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1289 for (i=0;i<n_vertices;i++) { 1290 ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr); 1291 } 1292 1293 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1294 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1295 for (i=0, n_R=0; i<pcis->n; i++) { 1296 if (!PetscBTLookup(bitmask,i)) { 1297 idx_R_local[n_R] = i; 1298 n_R++; 1299 } 1300 } 1301 1302 /* Block code */ 1303 vbs = 1; 1304 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1305 if (bs>1 && !(n_vertices%bs)) { 1306 PetscBool is_blocked = PETSC_TRUE; 1307 PetscInt *vary; 1308 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1309 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1310 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1311 for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++; 1312 for (i=0; i<n_vertices/bs; i++) { 1313 if (vary[i]!=0 && vary[i]!=bs) { 1314 is_blocked = PETSC_FALSE; 1315 break; 1316 } 1317 } 1318 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1319 vbs = bs; 1320 for (i=0;i<n_R/vbs;i++) { 1321 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1322 } 1323 } 1324 ierr = PetscFree(vary);CHKERRQ(ierr); 1325 } 1326 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1327 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1328 1329 /* print some info if requested */ 1330 if (pcbddc->dbg_flag) { 1331 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1332 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1337 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1338 } 1339 1340 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1341 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1342 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1343 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1344 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1345 for (i=0; i<n_D; i++) { 1346 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1347 } 1348 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1349 for (i=0, j=0; i<n_R; i++) { 1350 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1351 aux_array1[j++] = i; 1352 } 1353 } 1354 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1355 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1356 for (i=0, j=0; i<n_B; i++) { 1357 if (!PetscBTLookup(bitmask,is_indices[i])) { 1358 aux_array2[j++] = i; 1359 } 1360 } 1361 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1362 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1363 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1364 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1365 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1366 1367 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1368 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1369 for (i=0, j=0; i<n_R; i++) { 1370 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1371 aux_array1[j++] = i; 1372 } 1373 } 1374 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1375 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1376 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1377 } 1378 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1379 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1386 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1387 { 1388 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1389 PC_IS *pcis = (PC_IS*)pc->data; 1390 PC pc_temp; 1391 Mat A_RR; 1392 MatReuse reuse; 1393 PetscScalar m_one = -1.0; 1394 PetscReal value; 1395 PetscInt n_D,n_R,ibs,mbs; 1396 PetscBool use_exact,use_exact_reduced,issbaij; 1397 PetscErrorCode ierr; 1398 /* prefixes stuff */ 1399 char dir_prefix[256],neu_prefix[256],str_level[16]; 1400 size_t len; 1401 1402 PetscFunctionBegin; 1403 1404 /* compute prefixes */ 1405 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1406 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1407 if (!pcbddc->current_level) { 1408 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1409 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1410 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1411 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1412 } else { 1413 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1414 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1415 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1416 len -= 15; /* remove "pc_bddc_coarse_" */ 1417 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1418 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1419 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1420 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1421 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1422 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1423 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1424 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1425 } 1426 1427 /* DIRICHLET PROBLEM */ 1428 if (dirichlet) { 1429 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1430 if (pcbddc->issym) { 1431 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1432 } 1433 /* Matrix for Dirichlet problem is pcis->A_II */ 1434 n_D = pcis->n - pcis->n_B; 1435 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1436 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1437 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1438 /* default */ 1439 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1440 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1441 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1442 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1443 if (issbaij) { 1444 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1445 } else { 1446 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1447 } 1448 /* Allow user's customization */ 1449 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1450 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1451 } 1452 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1453 if (sub_schurs->interior_solver) { 1454 ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr); 1455 } 1456 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1457 if (!n_D) { 1458 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1459 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1460 } 1461 /* Set Up KSP for Dirichlet problem of BDDC */ 1462 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1463 /* set ksp_D into pcis data */ 1464 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1465 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1466 pcis->ksp_D = pcbddc->ksp_D; 1467 } 1468 1469 /* NEUMANN PROBLEM */ 1470 A_RR = 0; 1471 if (neumann) { 1472 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1473 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1474 if (pcbddc->ksp_R) { /* already created ksp */ 1475 PetscInt nn_R; 1476 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1477 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1478 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1479 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1480 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1481 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1482 reuse = MAT_INITIAL_MATRIX; 1483 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1484 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1485 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1486 reuse = MAT_INITIAL_MATRIX; 1487 } else { /* safe to reuse the matrix */ 1488 reuse = MAT_REUSE_MATRIX; 1489 } 1490 } 1491 /* last check */ 1492 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1493 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1494 reuse = MAT_INITIAL_MATRIX; 1495 } 1496 } else { /* first time, so we need to create the matrix */ 1497 reuse = MAT_INITIAL_MATRIX; 1498 } 1499 /* extract A_RR */ 1500 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1501 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1502 if (ibs != mbs) { 1503 Mat newmat; 1504 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1505 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1506 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1507 } else { 1508 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1509 } 1510 if (pcbddc->issym) { 1511 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1512 } 1513 if (!pcbddc->ksp_R) { /* create object if not present */ 1514 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1515 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1516 /* default */ 1517 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1518 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1519 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1521 if (issbaij) { 1522 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1523 } else { 1524 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1525 } 1526 /* Allow user's customization */ 1527 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1528 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1529 } 1530 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1531 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1532 if (!n_R) { 1533 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1534 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1535 } 1536 /* Set Up KSP for Neumann problem of BDDC */ 1537 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1538 } 1539 1540 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1541 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1542 if (pcbddc->dbg_flag) { 1543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1544 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1545 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1546 } 1547 if (dirichlet) { /* Dirichlet */ 1548 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1549 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1550 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1551 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1552 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1553 /* need to be adapted? */ 1554 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1555 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1556 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1557 /* print info */ 1558 if (pcbddc->dbg_flag) { 1559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1560 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1561 } 1562 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1563 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1564 } 1565 } 1566 if (neumann) { /* Neumann */ 1567 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1568 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1569 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1570 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1571 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1572 /* need to be adapted? */ 1573 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1574 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1575 /* print info */ 1576 if (pcbddc->dbg_flag) { 1577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1579 } 1580 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1581 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1582 } 1583 } 1584 } 1585 /* free Neumann problem's matrix */ 1586 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1587 PetscFunctionReturn(0); 1588 } 1589 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1592 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1593 { 1594 PetscErrorCode ierr; 1595 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1596 1597 PetscFunctionBegin; 1598 if (applytranspose) { 1599 if (pcbddc->local_auxmat1) { 1600 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1601 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1602 } 1603 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1604 } else { 1605 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1606 if (pcbddc->local_auxmat1) { 1607 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1608 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1609 } 1610 } 1611 PetscFunctionReturn(0); 1612 } 1613 1614 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1617 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1618 { 1619 PetscErrorCode ierr; 1620 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1621 PC_IS* pcis = (PC_IS*) (pc->data); 1622 const PetscScalar zero = 0.0; 1623 1624 PetscFunctionBegin; 1625 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1626 if (applytranspose) { 1627 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1628 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1629 } else { 1630 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1631 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1632 } 1633 /* start communications from local primal nodes to rhs of coarse solver */ 1634 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1635 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1636 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 1638 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1639 /* TODO remove null space when doing multilevel */ 1640 if (pcbddc->coarse_ksp) { 1641 Vec rhs,sol; 1642 1643 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1644 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1645 if (applytranspose) { 1646 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1647 } else { 1648 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1649 } 1650 } 1651 1652 /* Local solution on R nodes */ 1653 if (pcis->n) { 1654 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1655 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1657 if (pcbddc->switch_static) { 1658 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1659 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1660 } 1661 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1662 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1663 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1664 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 if (pcbddc->switch_static) { 1666 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 } 1669 } 1670 1671 /* communications from coarse sol to local primal nodes */ 1672 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1673 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1674 1675 /* Sum contributions from two levels */ 1676 if (applytranspose) { 1677 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1678 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1679 } else { 1680 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1681 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1688 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1689 { 1690 PetscErrorCode ierr; 1691 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1692 PetscScalar *array; 1693 Vec from,to; 1694 1695 PetscFunctionBegin; 1696 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1697 from = pcbddc->coarse_vec; 1698 to = pcbddc->vec1_P; 1699 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1700 Vec tvec; 1701 1702 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1703 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1704 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1705 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1706 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1707 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1708 } 1709 } else { /* from local to global -> put data in coarse right hand side */ 1710 from = pcbddc->vec1_P; 1711 to = pcbddc->coarse_vec; 1712 } 1713 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1719 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1720 { 1721 PetscErrorCode ierr; 1722 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1723 PetscScalar *array; 1724 Vec from,to; 1725 1726 PetscFunctionBegin; 1727 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1728 from = pcbddc->coarse_vec; 1729 to = pcbddc->vec1_P; 1730 } else { /* from local to global -> put data in coarse right hand side */ 1731 from = pcbddc->vec1_P; 1732 to = pcbddc->coarse_vec; 1733 } 1734 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1735 if (smode == SCATTER_FORWARD) { 1736 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1737 Vec tvec; 1738 1739 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1740 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1741 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1742 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1743 } 1744 } else { 1745 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1746 ierr = VecResetArray(from);CHKERRQ(ierr); 1747 } 1748 } 1749 PetscFunctionReturn(0); 1750 } 1751 1752 /* uncomment for testing purposes */ 1753 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1756 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1757 { 1758 PetscErrorCode ierr; 1759 PC_IS* pcis = (PC_IS*)(pc->data); 1760 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1761 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1762 /* one and zero */ 1763 PetscScalar one=1.0,zero=0.0; 1764 /* space to store constraints and their local indices */ 1765 PetscScalar *constraints_data; 1766 PetscInt *constraints_idxs,*constraints_idxs_B; 1767 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1768 PetscInt *constraints_n; 1769 /* iterators */ 1770 PetscInt i,j,k,total_counts,total_counts_cc; 1771 /* BLAS integers */ 1772 PetscBLASInt lwork,lierr; 1773 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1774 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1775 /* reuse */ 1776 PetscInt olocal_primal_size; 1777 PetscInt *oprimal_indices_local_idxs; 1778 /* change of basis */ 1779 PetscBool boolforchange,qr_needed; 1780 PetscBT change_basis,qr_needed_idx; 1781 /* auxiliary stuff */ 1782 PetscInt *nnz,*is_indices; 1783 PetscInt ncc; 1784 /* some quantities */ 1785 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1786 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1787 1788 PetscFunctionBegin; 1789 /* Destroy Mat objects computed previously */ 1790 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1791 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1792 /* save info on constraints from previous setup (if any) */ 1793 olocal_primal_size = pcbddc->local_primal_size; 1794 ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr); 1795 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 1796 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1797 1798 /* print some info */ 1799 if (pcbddc->dbg_flag) { 1800 IS vertices; 1801 PetscInt nv,nedges,nfaces; 1802 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1803 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1804 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1805 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1808 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1809 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1810 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1811 } 1812 1813 if (!pcbddc->adaptive_selection) { 1814 IS ISForVertices,*ISForFaces,*ISForEdges; 1815 MatNullSpace nearnullsp; 1816 const Vec *nearnullvecs; 1817 Vec *localnearnullsp; 1818 PetscScalar *array; 1819 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1820 PetscBool nnsp_has_cnst; 1821 /* LAPACK working arrays for SVD or POD */ 1822 PetscBool skip_lapack; 1823 PetscScalar *work; 1824 PetscReal *singular_vals; 1825 #if defined(PETSC_USE_COMPLEX) 1826 PetscReal *rwork; 1827 #endif 1828 #if defined(PETSC_MISSING_LAPACK_GESVD) 1829 PetscScalar *temp_basis,*correlation_mat; 1830 #else 1831 PetscBLASInt dummy_int=1; 1832 PetscScalar dummy_scalar=1.; 1833 #endif 1834 1835 /* Get index sets for faces, edges and vertices from graph */ 1836 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1837 /* free unneeded index sets */ 1838 if (!pcbddc->use_vertices) { 1839 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1840 } 1841 if (!pcbddc->use_edges) { 1842 for (i=0;i<n_ISForEdges;i++) { 1843 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1844 } 1845 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1846 n_ISForEdges = 0; 1847 } 1848 if (!pcbddc->use_faces) { 1849 for (i=0;i<n_ISForFaces;i++) { 1850 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1851 } 1852 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1853 n_ISForFaces = 0; 1854 } 1855 1856 #if defined(PETSC_USE_DEBUG) 1857 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 1858 Also use_change_of_basis should be consistent among processors */ 1859 if (pcbddc->NullSpace) { 1860 PetscBool tbool[2],gbool[2]; 1861 1862 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 1863 pcbddc->use_change_of_basis = PETSC_TRUE; 1864 if (!ISForEdges) { 1865 pcbddc->use_change_on_faces = PETSC_TRUE; 1866 } 1867 } 1868 tbool[0] = pcbddc->use_change_of_basis; 1869 tbool[1] = pcbddc->use_change_on_faces; 1870 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1871 pcbddc->use_change_of_basis = gbool[0]; 1872 pcbddc->use_change_on_faces = gbool[1]; 1873 } 1874 #endif 1875 1876 /* check if near null space is attached to global mat */ 1877 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1878 if (nearnullsp) { 1879 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1880 /* remove any stored info */ 1881 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1882 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1883 /* store information for BDDC solver reuse */ 1884 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1885 pcbddc->onearnullspace = nearnullsp; 1886 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1887 for (i=0;i<nnsp_size;i++) { 1888 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1889 } 1890 } else { /* if near null space is not provided BDDC uses constants by default */ 1891 nnsp_size = 0; 1892 nnsp_has_cnst = PETSC_TRUE; 1893 } 1894 /* get max number of constraints on a single cc */ 1895 max_constraints = nnsp_size; 1896 if (nnsp_has_cnst) max_constraints++; 1897 1898 /* 1899 Evaluate maximum storage size needed by the procedure 1900 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 1901 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 1902 There can be multiple constraints per connected component 1903 */ 1904 n_vertices = 0; 1905 if (ISForVertices) { 1906 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1907 } 1908 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 1909 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 1910 1911 total_counts = n_ISForFaces+n_ISForEdges; 1912 total_counts *= max_constraints; 1913 total_counts += n_vertices; 1914 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1915 1916 total_counts = 0; 1917 max_size_of_constraint = 0; 1918 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1919 IS used_is; 1920 if (i<n_ISForEdges) { 1921 used_is = ISForEdges[i]; 1922 } else { 1923 used_is = ISForFaces[i-n_ISForEdges]; 1924 } 1925 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 1926 total_counts += j; 1927 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1928 } 1929 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 1930 1931 /* get local part of global near null space vectors */ 1932 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1933 for (k=0;k<nnsp_size;k++) { 1934 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1935 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1936 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1937 } 1938 1939 /* whether or not to skip lapack calls */ 1940 skip_lapack = PETSC_TRUE; 1941 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1942 1943 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1944 if (!skip_lapack) { 1945 PetscScalar temp_work; 1946 1947 #if defined(PETSC_MISSING_LAPACK_GESVD) 1948 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1949 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1950 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1951 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1952 #if defined(PETSC_USE_COMPLEX) 1953 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1954 #endif 1955 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1956 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1957 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1958 lwork = -1; 1959 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1960 #if !defined(PETSC_USE_COMPLEX) 1961 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1962 #else 1963 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1964 #endif 1965 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1966 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1967 #else /* on missing GESVD */ 1968 /* SVD */ 1969 PetscInt max_n,min_n; 1970 max_n = max_size_of_constraint; 1971 min_n = max_constraints; 1972 if (max_size_of_constraint < max_constraints) { 1973 min_n = max_size_of_constraint; 1974 max_n = max_constraints; 1975 } 1976 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1977 #if defined(PETSC_USE_COMPLEX) 1978 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1979 #endif 1980 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1981 lwork = -1; 1982 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1983 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1984 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1985 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1986 #if !defined(PETSC_USE_COMPLEX) 1987 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1988 #else 1989 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1990 #endif 1991 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1992 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1993 #endif /* on missing GESVD */ 1994 /* Allocate optimal workspace */ 1995 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1996 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 1997 } 1998 /* Now we can loop on constraining sets */ 1999 total_counts = 0; 2000 constraints_idxs_ptr[0] = 0; 2001 constraints_data_ptr[0] = 0; 2002 /* vertices */ 2003 if (n_vertices) { 2004 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2005 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2006 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2007 for (i=0;i<n_vertices;i++) { 2008 constraints_n[total_counts] = 1; 2009 constraints_data[total_counts] = 1.0; 2010 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2011 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2012 total_counts++; 2013 } 2014 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2015 PetscBool used_vertex; 2016 for (i=0;i<n_vertices;i++) { 2017 used_vertex = PETSC_FALSE; 2018 k = 0; 2019 while (!used_vertex && k<nnsp_size) { 2020 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2021 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2022 constraints_n[total_counts] = 1; 2023 constraints_idxs[total_counts] = is_indices[i]; 2024 constraints_data[total_counts] = 1.0; 2025 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2026 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2027 total_counts++; 2028 used_vertex = PETSC_TRUE; 2029 } 2030 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2031 k++; 2032 } 2033 } 2034 } 2035 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2036 n_vertices = total_counts; 2037 } 2038 2039 /* edges and faces */ 2040 total_counts_cc = total_counts; 2041 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2042 IS used_is; 2043 PetscBool idxs_copied = PETSC_FALSE; 2044 2045 if (ncc<n_ISForEdges) { 2046 used_is = ISForEdges[ncc]; 2047 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2048 } else { 2049 used_is = ISForFaces[ncc-n_ISForEdges]; 2050 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2051 } 2052 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2053 2054 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2055 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2056 /* change of basis should not be performed on local periodic nodes */ 2057 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2058 if (nnsp_has_cnst) { 2059 PetscScalar quad_value; 2060 2061 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2062 idxs_copied = PETSC_TRUE; 2063 2064 if (!pcbddc->use_nnsp_true) { 2065 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2066 } else { 2067 quad_value = 1.0; 2068 } 2069 for (j=0;j<size_of_constraint;j++) { 2070 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2071 } 2072 temp_constraints++; 2073 total_counts++; 2074 } 2075 for (k=0;k<nnsp_size;k++) { 2076 PetscReal real_value; 2077 PetscScalar *ptr_to_data; 2078 2079 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2080 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2081 for (j=0;j<size_of_constraint;j++) { 2082 ptr_to_data[j] = array[is_indices[j]]; 2083 } 2084 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2085 /* check if array is null on the connected component */ 2086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2087 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2088 if (real_value > 0.0) { /* keep indices and values */ 2089 temp_constraints++; 2090 total_counts++; 2091 if (!idxs_copied) { 2092 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2093 idxs_copied = PETSC_TRUE; 2094 } 2095 } 2096 } 2097 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2098 valid_constraints = temp_constraints; 2099 if (!pcbddc->use_nnsp_true && temp_constraints) { 2100 if (temp_constraints == 1) { /* just normalize the constraint */ 2101 PetscScalar norm,*ptr_to_data; 2102 2103 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2104 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2105 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2106 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2107 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2108 } else { /* perform SVD */ 2109 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2110 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2111 2112 #if defined(PETSC_MISSING_LAPACK_GESVD) 2113 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2114 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2115 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2116 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2117 from that computed using LAPACKgesvd 2118 -> This is due to a different computation of eigenvectors in LAPACKheev 2119 -> The quality of the POD-computed basis will be the same */ 2120 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2121 /* Store upper triangular part of correlation matrix */ 2122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2123 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2124 for (j=0;j<temp_constraints;j++) { 2125 for (k=0;k<j+1;k++) { 2126 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2127 } 2128 } 2129 /* compute eigenvalues and eigenvectors of correlation matrix */ 2130 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2131 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2132 #if !defined(PETSC_USE_COMPLEX) 2133 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2134 #else 2135 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2136 #endif 2137 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2138 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2139 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2140 j = 0; 2141 while (j < temp_constraints && singular_vals[j] < tol) j++; 2142 total_counts = total_counts-j; 2143 valid_constraints = temp_constraints-j; 2144 /* scale and copy POD basis into used quadrature memory */ 2145 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2146 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2147 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2148 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2149 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2150 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2151 if (j<temp_constraints) { 2152 PetscInt ii; 2153 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2154 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2155 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2156 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2157 for (k=0;k<temp_constraints-j;k++) { 2158 for (ii=0;ii<size_of_constraint;ii++) { 2159 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2160 } 2161 } 2162 } 2163 #else /* on missing GESVD */ 2164 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2165 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2166 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2167 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2168 #if !defined(PETSC_USE_COMPLEX) 2169 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2170 #else 2171 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2172 #endif 2173 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2174 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2175 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2176 k = temp_constraints; 2177 if (k > size_of_constraint) k = size_of_constraint; 2178 j = 0; 2179 while (j < k && singular_vals[k-j-1] < tol) j++; 2180 valid_constraints = k-j; 2181 total_counts = total_counts-temp_constraints+valid_constraints; 2182 #endif /* on missing GESVD */ 2183 } 2184 } 2185 /* update pointers information */ 2186 if (valid_constraints) { 2187 constraints_n[total_counts_cc] = valid_constraints; 2188 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2189 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2190 total_counts_cc++; 2191 /* set change_of_basis flag */ 2192 if (boolforchange) { 2193 for (j=valid_constraints;j>0;j--) { 2194 PetscBTSet(change_basis,total_counts-j); 2195 } 2196 } 2197 } 2198 } 2199 /* free workspace */ 2200 if (!skip_lapack) { 2201 ierr = PetscFree(work);CHKERRQ(ierr); 2202 #if defined(PETSC_USE_COMPLEX) 2203 ierr = PetscFree(rwork);CHKERRQ(ierr); 2204 #endif 2205 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2206 #if defined(PETSC_MISSING_LAPACK_GESVD) 2207 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2208 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2209 #endif 2210 } 2211 for (k=0;k<nnsp_size;k++) { 2212 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2213 } 2214 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2215 /* free index sets of faces, edges and vertices */ 2216 for (i=0;i<n_ISForFaces;i++) { 2217 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2218 } 2219 if (n_ISForFaces) { 2220 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2221 } 2222 for (i=0;i<n_ISForEdges;i++) { 2223 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2224 } 2225 if (n_ISForEdges) { 2226 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2227 } 2228 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2229 } else { 2230 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2231 2232 total_counts = 0; 2233 n_vertices = 0; 2234 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 2235 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2236 } 2237 max_constraints = 0; 2238 total_counts_cc = 0; 2239 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2240 total_counts += pcbddc->adaptive_constraints_n[i]; 2241 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2242 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2243 } 2244 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2245 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2246 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2247 constraints_data = pcbddc->adaptive_constraints_data; 2248 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2249 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2250 total_counts_cc = 0; 2251 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2252 if (pcbddc->adaptive_constraints_n[i]) { 2253 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2254 } 2255 } 2256 #if 0 2257 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2258 for (i=0;i<total_counts_cc;i++) { 2259 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2260 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2261 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2262 printf(" %d",constraints_idxs[j]); 2263 } 2264 printf("\n"); 2265 printf("number of cc: %d\n",constraints_n[i]); 2266 } 2267 for (i=0;i<n_vertices;i++) { 2268 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2269 } 2270 for (i=0;i<sub_schurs->n_subs;i++) { 2271 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2272 } 2273 #endif 2274 2275 max_size_of_constraint = 0; 2276 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2277 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2278 /* Change of basis */ 2279 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2280 if (pcbddc->use_change_of_basis) { 2281 PetscInt cum = n_vertices; 2282 for (i=0;i<sub_schurs->n_subs;i++) { 2283 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2284 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2285 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2286 } 2287 } 2288 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2289 } 2290 } 2291 } 2292 pcbddc->local_primal_size = total_counts; 2293 2294 /* map constraints_idxs in boundary numbering */ 2295 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2296 if (i != constraints_idxs_ptr[total_counts_cc]) { 2297 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2298 } 2299 2300 /* Create constraint matrix */ 2301 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2302 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2303 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2304 /* array to compute a local numbering of constraints : vertices first then constraints */ 2305 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2306 2307 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2308 /* determine if a QR strategy is needed for change of basis */ 2309 qr_needed = PETSC_FALSE; 2310 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2311 total_primal_vertices=0; 2312 for (i=0;i<total_counts_cc;i++) { 2313 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2314 if (size_of_constraint == 1) { 2315 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2316 } else if (PetscBTLookup(change_basis,i)) { 2317 for (k=0;k<constraints_n[i];k++) { 2318 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2319 } 2320 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2321 PetscBTSet(qr_needed_idx,i); 2322 qr_needed = PETSC_TRUE; 2323 } 2324 } 2325 } 2326 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2327 pcbddc->n_vertices = total_primal_vertices; 2328 /* permute indices in order to have a sorted set of vertices */ 2329 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2330 2331 /* nonzero structure of constraint matrix */ 2332 /* and get reference dof for local constraints */ 2333 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2334 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 2335 2336 j = total_primal_vertices; 2337 total_counts = total_primal_vertices; 2338 for (i=n_vertices;i<total_counts_cc;i++) { 2339 if (!PetscBTLookup(change_basis,i)) { 2340 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2341 for (k=0;k<constraints_n[i];k++) { 2342 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2343 nnz[j+k] = size_of_constraint; 2344 } 2345 j += constraints_n[i]; 2346 } 2347 } 2348 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2349 ierr = PetscFree(nnz);CHKERRQ(ierr); 2350 2351 /* set values in constraint matrix */ 2352 for (i=0;i<total_primal_vertices;i++) { 2353 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2354 } 2355 total_counts = total_primal_vertices; 2356 for (i=n_vertices;i<total_counts_cc;i++) { 2357 if (!PetscBTLookup(change_basis,i)) { 2358 PetscInt *cols; 2359 2360 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2361 cols = constraints_idxs+constraints_idxs_ptr[i]; 2362 for (k=0;k<constraints_n[i];k++) { 2363 PetscInt row = total_counts+k; 2364 PetscScalar *vals; 2365 2366 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2367 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2368 } 2369 total_counts += constraints_n[i]; 2370 } 2371 } 2372 /* assembling */ 2373 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2374 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2375 2376 /* 2377 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2378 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2379 */ 2380 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2381 if (pcbddc->use_change_of_basis) { 2382 /* dual and primal dofs on a single cc */ 2383 PetscInt dual_dofs,primal_dofs; 2384 /* working stuff for GEQRF */ 2385 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2386 PetscBLASInt lqr_work; 2387 /* working stuff for UNGQR */ 2388 PetscScalar *gqr_work,lgqr_work_t; 2389 PetscBLASInt lgqr_work; 2390 /* working stuff for TRTRS */ 2391 PetscScalar *trs_rhs; 2392 PetscBLASInt Blas_NRHS; 2393 /* pointers for values insertion into change of basis matrix */ 2394 PetscInt *start_rows,*start_cols; 2395 PetscScalar *start_vals; 2396 /* working stuff for values insertion */ 2397 PetscBT is_primal; 2398 PetscInt *aux_primal_numbering_B; 2399 /* matrix sizes */ 2400 PetscInt global_size,local_size; 2401 /* temporary change of basis */ 2402 Mat localChangeOfBasisMatrix; 2403 /* extra space for debugging */ 2404 PetscScalar *dbg_work; 2405 2406 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2407 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2408 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2409 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2410 /* nonzeros for local mat */ 2411 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2412 for (i=0;i<pcis->n;i++) nnz[i]=1; 2413 for (i=n_vertices;i<total_counts_cc;i++) { 2414 if (PetscBTLookup(change_basis,i)) { 2415 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2416 if (PetscBTLookup(qr_needed_idx,i)) { 2417 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2418 } else { 2419 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2420 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2421 } 2422 } 2423 } 2424 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2425 ierr = PetscFree(nnz);CHKERRQ(ierr); 2426 /* Set initial identity in the matrix */ 2427 for (i=0;i<pcis->n;i++) { 2428 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2429 } 2430 2431 if (pcbddc->dbg_flag) { 2432 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2434 } 2435 2436 2437 /* Now we loop on the constraints which need a change of basis */ 2438 /* 2439 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2440 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2441 2442 Basic blocks of change of basis matrix T computed by 2443 2444 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2445 2446 | 1 0 ... 0 s_1/S | 2447 | 0 1 ... 0 s_2/S | 2448 | ... | 2449 | 0 ... 1 s_{n-1}/S | 2450 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2451 2452 with S = \sum_{i=1}^n s_i^2 2453 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2454 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2455 2456 - QR decomposition of constraints otherwise 2457 */ 2458 if (qr_needed) { 2459 /* space to store Q */ 2460 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2461 /* first we issue queries for optimal work */ 2462 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2463 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2464 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2465 lqr_work = -1; 2466 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2467 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2468 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2469 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2470 lgqr_work = -1; 2471 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2472 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2473 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2474 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2475 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2476 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2477 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2478 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2479 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2480 /* array to store scaling factors for reflectors */ 2481 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2482 /* array to store rhs and solution of triangular solver */ 2483 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2484 /* allocating workspace for check */ 2485 if (pcbddc->dbg_flag) { 2486 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2487 } 2488 } 2489 /* array to store whether a node is primal or not */ 2490 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2491 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2492 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->primal_indices_local_idxs,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2493 if (i != total_primal_vertices) { 2494 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2495 } 2496 for (i=0;i<total_primal_vertices;i++) { 2497 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2498 } 2499 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2500 2501 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2502 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2503 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2504 if (PetscBTLookup(change_basis,total_counts)) { 2505 /* get constraint info */ 2506 primal_dofs = constraints_n[total_counts]; 2507 dual_dofs = size_of_constraint-primal_dofs; 2508 2509 if (pcbddc->dbg_flag) { 2510 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2511 } 2512 2513 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2514 2515 /* copy quadrature constraints for change of basis check */ 2516 if (pcbddc->dbg_flag) { 2517 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2518 } 2519 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2520 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2521 2522 /* compute QR decomposition of constraints */ 2523 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2524 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2526 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2527 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2528 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2529 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2530 2531 /* explictly compute R^-T */ 2532 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2533 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2534 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2535 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2537 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2538 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2539 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2540 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2541 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2542 2543 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2546 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2547 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2548 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2549 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2550 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2551 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2552 2553 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2554 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2555 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2556 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2557 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2558 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2560 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2562 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2563 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2564 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2565 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2566 2567 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2568 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2569 /* insert cols for primal dofs */ 2570 for (j=0;j<primal_dofs;j++) { 2571 start_vals = &qr_basis[j*size_of_constraint]; 2572 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2573 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2574 } 2575 /* insert cols for dual dofs */ 2576 for (j=0,k=0;j<dual_dofs;k++) { 2577 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2578 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2579 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2580 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2581 j++; 2582 } 2583 } 2584 2585 /* check change of basis */ 2586 if (pcbddc->dbg_flag) { 2587 PetscInt ii,jj; 2588 PetscBool valid_qr=PETSC_TRUE; 2589 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2590 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2591 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2592 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2593 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2594 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2595 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2596 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2597 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2598 for (jj=0;jj<size_of_constraint;jj++) { 2599 for (ii=0;ii<primal_dofs;ii++) { 2600 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2601 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2602 } 2603 } 2604 if (!valid_qr) { 2605 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2606 for (jj=0;jj<size_of_constraint;jj++) { 2607 for (ii=0;ii<primal_dofs;ii++) { 2608 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2609 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2610 } 2611 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2612 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2613 } 2614 } 2615 } 2616 } else { 2617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2618 } 2619 } 2620 } else { /* simple transformation block */ 2621 PetscInt row,col; 2622 PetscScalar val,norm; 2623 2624 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2625 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2626 for (j=0;j<size_of_constraint;j++) { 2627 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2628 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2629 if (!PetscBTLookup(is_primal,row_B)) { 2630 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2631 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2632 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2633 } else { 2634 for (k=0;k<size_of_constraint;k++) { 2635 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2636 if (row != col) { 2637 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2638 } else { 2639 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2640 } 2641 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2642 } 2643 } 2644 } 2645 if (pcbddc->dbg_flag) { 2646 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2647 } 2648 } 2649 } else { 2650 if (pcbddc->dbg_flag) { 2651 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2652 } 2653 } 2654 } 2655 2656 /* free workspace */ 2657 if (qr_needed) { 2658 if (pcbddc->dbg_flag) { 2659 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2660 } 2661 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2662 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2663 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2664 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2665 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2666 } 2667 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2668 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2669 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2670 2671 /* assembling of global change of variable */ 2672 { 2673 Mat tmat; 2674 PetscInt bs; 2675 2676 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2677 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2678 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2679 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2680 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2681 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2682 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2683 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2684 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2685 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2686 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2687 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2688 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2689 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2690 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2691 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2692 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2693 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2694 } 2695 /* check */ 2696 if (pcbddc->dbg_flag) { 2697 PetscReal error; 2698 Vec x,x_change; 2699 2700 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2701 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2702 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2703 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2704 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2705 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2706 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2707 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2708 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2709 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2710 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2711 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2712 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2713 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2714 ierr = VecDestroy(&x);CHKERRQ(ierr); 2715 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2716 } 2717 2718 /* adapt sub_schurs computed (if any) */ 2719 if (pcbddc->use_deluxe_scaling) { 2720 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2721 if (sub_schurs->S_Ej_all) { 2722 Mat S_new,tmat; 2723 IS is_all_N; 2724 2725 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2726 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2727 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2728 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2729 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2730 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2731 sub_schurs->S_Ej_all = S_new; 2732 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2733 if (sub_schurs->sum_S_Ej_all) { 2734 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2735 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2736 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2737 sub_schurs->sum_S_Ej_all = S_new; 2738 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2739 } 2740 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2741 } 2742 } 2743 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2744 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2745 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2746 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2747 } 2748 2749 /* set up change of basis context */ 2750 if (pcbddc->ChangeOfBasisMatrix) { 2751 PCBDDCChange_ctx change_ctx; 2752 2753 if (!pcbddc->new_global_mat) { 2754 PetscInt global_size,local_size; 2755 2756 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2757 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2758 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2759 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2760 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2761 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2762 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2763 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2764 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2765 } else { 2766 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2767 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2768 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2769 } 2770 if (!pcbddc->user_ChangeOfBasisMatrix) { 2771 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2772 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2773 } else { 2774 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2775 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2776 } 2777 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2778 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2779 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2780 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2781 } 2782 2783 /* check if a new primal space has been introduced */ 2784 pcbddc->new_primal_space_local = PETSC_TRUE; 2785 if (olocal_primal_size == pcbddc->local_primal_size) { 2786 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2787 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2788 } 2789 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2790 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2791 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2792 2793 /* flush dbg viewer */ 2794 if (pcbddc->dbg_flag) { 2795 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2796 } 2797 2798 /* free workspace */ 2799 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2800 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2801 if (!pcbddc->adaptive_selection) { 2802 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2803 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2804 } else { 2805 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2806 pcbddc->adaptive_constraints_idxs_ptr, 2807 pcbddc->adaptive_constraints_data_ptr, 2808 pcbddc->adaptive_constraints_idxs, 2809 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2810 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2811 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2812 } 2813 PetscFunctionReturn(0); 2814 } 2815 2816 #undef __FUNCT__ 2817 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2818 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2819 { 2820 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2821 PC_IS *pcis = (PC_IS*)pc->data; 2822 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2823 PetscInt ierr,i,vertex_size,N; 2824 PetscViewer viewer=pcbddc->dbg_viewer; 2825 2826 PetscFunctionBegin; 2827 /* Reset previously computed graph */ 2828 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2829 /* Init local Graph struct */ 2830 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2831 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 2832 2833 /* Check validity of the csr graph passed in by the user */ 2834 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2835 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2836 } 2837 2838 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2839 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2840 PetscInt *xadj,*adjncy; 2841 PetscInt nvtxs; 2842 PetscBool flg_row=PETSC_FALSE; 2843 2844 if (pcbddc->use_local_adj) { 2845 2846 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2847 if (flg_row) { 2848 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2849 pcbddc->computed_rowadj = PETSC_TRUE; 2850 } 2851 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2852 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2853 IS is_dummy; 2854 ISLocalToGlobalMapping l2gmap_dummy; 2855 PetscInt j,sum; 2856 PetscInt *cxadj,*cadjncy; 2857 const PetscInt *idxs; 2858 PCBDDCGraph graph; 2859 PetscBT is_on_boundary; 2860 2861 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2862 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2863 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2864 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2865 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 2866 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2867 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2868 if (flg_row) { 2869 graph->xadj = xadj; 2870 graph->adjncy = adjncy; 2871 } 2872 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2873 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2874 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2875 2876 if (pcbddc->dbg_flag) { 2877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 2878 for (i=0;i<graph->ncc;i++) { 2879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2880 } 2881 } 2882 2883 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2884 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2885 for (i=0;i<pcis->n_B;i++) { 2886 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2887 } 2888 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2889 2890 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2891 sum = 0; 2892 for (i=0;i<graph->ncc;i++) { 2893 PetscInt sizecc = 0; 2894 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2895 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2896 sizecc++; 2897 } 2898 } 2899 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2900 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2901 cxadj[graph->queue[j]] = sizecc; 2902 } 2903 } 2904 sum += sizecc*sizecc; 2905 } 2906 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2907 sum = 0; 2908 for (i=0;i<pcis->n;i++) { 2909 PetscInt temp = cxadj[i]; 2910 cxadj[i] = sum; 2911 sum += temp; 2912 } 2913 cxadj[pcis->n] = sum; 2914 for (i=0;i<graph->ncc;i++) { 2915 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2916 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2917 PetscInt k,sizecc = 0; 2918 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2919 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2920 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2921 sizecc++; 2922 } 2923 } 2924 } 2925 } 2926 } 2927 if (sum) { 2928 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2929 } else { 2930 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2931 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2932 } 2933 graph->xadj = 0; 2934 graph->adjncy = 0; 2935 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2936 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2937 } 2938 } 2939 if (pcbddc->dbg_flag) { 2940 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2941 } 2942 2943 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2944 vertex_size = 1; 2945 if (pcbddc->user_provided_isfordofs) { 2946 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2947 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2948 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2949 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2950 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2951 } 2952 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2953 pcbddc->n_ISForDofs = 0; 2954 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2955 } 2956 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2957 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2958 } else { 2959 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2960 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2961 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2962 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2963 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2964 } 2965 } 2966 } 2967 2968 /* Setup of Graph */ 2969 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2970 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2971 } 2972 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2973 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2974 } 2975 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 2976 2977 /* Graph's connected components analysis */ 2978 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2979 2980 /* print some info to stdout */ 2981 if (pcbddc->dbg_flag) { 2982 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 2983 } 2984 2985 /* mark topography has done */ 2986 pcbddc->recompute_topography = PETSC_FALSE; 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "PCBDDCSubsetNumbering" 2992 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2993 { 2994 Vec local_vec,global_vec; 2995 IS seqis,paris; 2996 VecScatter scatter_ctx; 2997 PetscScalar *array; 2998 PetscInt *temp_global_dofs; 2999 PetscScalar globalsum; 3000 PetscInt i,j,s; 3001 PetscInt nlocals,first_index,old_index,max_local,max_global; 3002 PetscMPIInt rank_prec_comm,size_prec_comm; 3003 PetscInt *dof_sizes,*dof_displs; 3004 PetscBool first_found; 3005 PetscErrorCode ierr; 3006 3007 PetscFunctionBegin; 3008 /* mpi buffers */ 3009 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3010 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3011 j = ( !rank_prec_comm ? size_prec_comm : 0); 3012 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3013 /* get maximum size of subset */ 3014 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3015 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3016 max_local = 0; 3017 for (i=0;i<n_local_dofs;i++) { 3018 if (max_local < temp_global_dofs[i] ) { 3019 max_local = temp_global_dofs[i]; 3020 } 3021 } 3022 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3023 max_global++; 3024 max_local = 0; 3025 for (i=0;i<n_local_dofs;i++) { 3026 if (max_local < local_dofs[i] ) { 3027 max_local = local_dofs[i]; 3028 } 3029 } 3030 max_local++; 3031 /* allocate workspace */ 3032 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3033 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3034 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3035 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3036 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3037 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3038 /* create scatter */ 3039 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3040 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3041 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3042 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3043 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3044 /* init array */ 3045 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3046 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3047 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3048 if (local_dofs_mult) { 3049 for (i=0;i<n_local_dofs;i++) { 3050 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3051 } 3052 } else { 3053 for (i=0;i<n_local_dofs;i++) { 3054 array[local_dofs[i]]=1.0; 3055 } 3056 } 3057 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3058 /* scatter into global vec and get total number of global dofs */ 3059 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3060 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3061 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3062 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3063 /* Fill global_vec with cumulative function for global numbering */ 3064 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3065 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3066 nlocals = 0; 3067 first_index = -1; 3068 first_found = PETSC_FALSE; 3069 for (i=0;i<s;i++) { 3070 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3071 first_found = PETSC_TRUE; 3072 first_index = i; 3073 } 3074 nlocals += (PetscInt)PetscRealPart(array[i]); 3075 } 3076 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3077 if (!rank_prec_comm) { 3078 dof_displs[0]=0; 3079 for (i=1;i<size_prec_comm;i++) { 3080 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3081 } 3082 } 3083 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3084 if (first_found) { 3085 array[first_index] += (PetscScalar)nlocals; 3086 old_index = first_index; 3087 for (i=first_index+1;i<s;i++) { 3088 if (PetscRealPart(array[i]) > 0.1) { 3089 array[i] += array[old_index]; 3090 old_index = i; 3091 } 3092 } 3093 } 3094 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3095 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3096 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3097 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3098 /* get global ordering of local dofs */ 3099 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3100 if (local_dofs_mult) { 3101 for (i=0;i<n_local_dofs;i++) { 3102 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3103 } 3104 } else { 3105 for (i=0;i<n_local_dofs;i++) { 3106 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3107 } 3108 } 3109 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3110 /* free workspace */ 3111 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3112 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3113 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3114 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3115 /* return pointer to global ordering of local dofs */ 3116 *global_numbering_subset = temp_global_dofs; 3117 PetscFunctionReturn(0); 3118 } 3119 3120 #undef __FUNCT__ 3121 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3122 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3123 { 3124 PetscInt i,j; 3125 PetscScalar *alphas; 3126 PetscErrorCode ierr; 3127 3128 PetscFunctionBegin; 3129 /* this implements stabilized Gram-Schmidt */ 3130 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3131 for (i=0;i<n;i++) { 3132 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3133 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3134 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3135 } 3136 ierr = PetscFree(alphas);CHKERRQ(ierr); 3137 PetscFunctionReturn(0); 3138 } 3139 3140 #undef __FUNCT__ 3141 #define __FUNCT__ "MatISGetSubassemblingPattern" 3142 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3143 { 3144 Mat subdomain_adj; 3145 IS new_ranks,ranks_send_to; 3146 MatPartitioning partitioner; 3147 Mat_IS *matis; 3148 PetscInt n_neighs,*neighs,*n_shared,**shared; 3149 PetscInt prank; 3150 PetscMPIInt size,rank,color; 3151 PetscInt *xadj,*adjncy,*oldranks; 3152 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3153 PetscInt i,local_size,threshold=0; 3154 PetscErrorCode ierr; 3155 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3156 PetscSubcomm subcomm; 3157 3158 PetscFunctionBegin; 3159 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3160 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3161 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3162 3163 /* Get info on mapping */ 3164 matis = (Mat_IS*)(mat->data); 3165 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3166 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3167 3168 /* build local CSR graph of subdomains' connectivity */ 3169 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3170 xadj[0] = 0; 3171 xadj[1] = PetscMax(n_neighs-1,0); 3172 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3173 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3174 3175 if (threshold) { 3176 PetscInt xadj_count = 0; 3177 for (i=1;i<n_neighs;i++) { 3178 if (n_shared[i] > threshold) { 3179 adjncy[xadj_count] = neighs[i]; 3180 adjncy_wgt[xadj_count] = n_shared[i]; 3181 xadj_count++; 3182 } 3183 } 3184 xadj[1] = xadj_count; 3185 } else { 3186 if (xadj[1]) { 3187 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3188 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3189 } 3190 } 3191 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3192 if (use_square) { 3193 for (i=0;i<xadj[1];i++) { 3194 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3195 } 3196 } 3197 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3198 3199 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3200 3201 /* 3202 Restrict work on active processes only. 3203 */ 3204 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3205 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3206 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3207 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3208 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3209 if (color) { 3210 ierr = PetscFree(xadj);CHKERRQ(ierr); 3211 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3212 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3213 } else { 3214 PetscInt coarsening_ratio; 3215 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3216 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3217 prank = rank; 3218 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3219 /* 3220 for (i=0;i<size;i++) { 3221 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3222 } 3223 */ 3224 for (i=0;i<xadj[1];i++) { 3225 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3226 } 3227 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3228 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3229 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3230 3231 /* Partition */ 3232 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3233 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3234 if (use_vwgt) { 3235 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3236 v_wgt[0] = local_size; 3237 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3238 } 3239 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3240 coarsening_ratio = size/n_subdomains; 3241 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3242 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3243 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3244 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3245 3246 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3247 if (contiguous) { 3248 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3249 } else { 3250 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3251 } 3252 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3253 /* clean up */ 3254 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3255 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3256 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3257 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3258 } 3259 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3260 3261 /* assemble parallel IS for sends */ 3262 i = 1; 3263 if (color) i=0; 3264 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3265 3266 /* get back IS */ 3267 *is_sends = ranks_send_to; 3268 PetscFunctionReturn(0); 3269 } 3270 3271 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3272 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatISSubassemble" 3275 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3276 { 3277 Mat local_mat; 3278 Mat_IS *matis; 3279 IS is_sends_internal; 3280 PetscInt rows,cols,new_local_rows; 3281 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3282 PetscBool ismatis,isdense,newisdense,destroy_mat; 3283 ISLocalToGlobalMapping l2gmap; 3284 PetscInt* l2gmap_indices; 3285 const PetscInt* is_indices; 3286 MatType new_local_type; 3287 /* buffers */ 3288 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3289 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3290 PetscInt *recv_buffer_idxs_local; 3291 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3292 /* MPI */ 3293 MPI_Comm comm,comm_n; 3294 PetscSubcomm subcomm; 3295 PetscMPIInt n_sends,n_recvs,commsize; 3296 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3297 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3298 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3299 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3300 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3301 PetscErrorCode ierr; 3302 3303 PetscFunctionBegin; 3304 /* TODO: add missing checks */ 3305 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3306 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3307 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3308 PetscValidLogicalCollectiveInt(mat,nis,7); 3309 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3310 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3311 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3312 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3313 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3314 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3315 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3316 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3317 PetscInt mrows,mcols,mnrows,mncols; 3318 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3319 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3320 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3321 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3322 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3323 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3324 } 3325 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3326 PetscValidLogicalCollectiveInt(mat,bs,0); 3327 /* prepare IS for sending if not provided */ 3328 if (!is_sends) { 3329 PetscBool pcontig = PETSC_TRUE; 3330 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3331 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3332 } else { 3333 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3334 is_sends_internal = is_sends; 3335 } 3336 3337 /* get pointer of MATIS data */ 3338 matis = (Mat_IS*)mat->data; 3339 3340 /* get comm */ 3341 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3342 3343 /* compute number of sends */ 3344 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3345 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3346 3347 /* compute number of receives */ 3348 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3349 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3350 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3351 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3352 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3353 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3354 ierr = PetscFree(iflags);CHKERRQ(ierr); 3355 3356 /* restrict comm if requested */ 3357 subcomm = 0; 3358 destroy_mat = PETSC_FALSE; 3359 if (restrict_comm) { 3360 PetscMPIInt color,subcommsize; 3361 3362 color = 0; 3363 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3364 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3365 subcommsize = commsize - subcommsize; 3366 /* check if reuse has been requested */ 3367 if (reuse == MAT_REUSE_MATRIX) { 3368 if (*mat_n) { 3369 PetscMPIInt subcommsize2; 3370 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3371 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3372 comm_n = PetscObjectComm((PetscObject)*mat_n); 3373 } else { 3374 comm_n = PETSC_COMM_SELF; 3375 } 3376 } else { /* MAT_INITIAL_MATRIX */ 3377 PetscMPIInt rank; 3378 3379 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3380 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3381 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3382 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3383 comm_n = PetscSubcommChild(subcomm); 3384 } 3385 /* flag to destroy *mat_n if not significative */ 3386 if (color) destroy_mat = PETSC_TRUE; 3387 } else { 3388 comm_n = comm; 3389 } 3390 3391 /* prepare send/receive buffers */ 3392 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3393 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3394 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3395 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3396 if (nis) { 3397 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3398 } 3399 3400 /* Get data from local matrices */ 3401 if (!isdense) { 3402 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3403 /* TODO: See below some guidelines on how to prepare the local buffers */ 3404 /* 3405 send_buffer_vals should contain the raw values of the local matrix 3406 send_buffer_idxs should contain: 3407 - MatType_PRIVATE type 3408 - PetscInt size_of_l2gmap 3409 - PetscInt global_row_indices[size_of_l2gmap] 3410 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3411 */ 3412 } else { 3413 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3414 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3415 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3416 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3417 send_buffer_idxs[1] = i; 3418 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3419 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3420 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3421 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3422 for (i=0;i<n_sends;i++) { 3423 ilengths_vals[is_indices[i]] = len*len; 3424 ilengths_idxs[is_indices[i]] = len+2; 3425 } 3426 } 3427 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3428 /* additional is (if any) */ 3429 if (nis) { 3430 PetscMPIInt psum; 3431 PetscInt j; 3432 for (j=0,psum=0;j<nis;j++) { 3433 PetscInt plen; 3434 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3435 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3436 psum += len+1; /* indices + lenght */ 3437 } 3438 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3439 for (j=0,psum=0;j<nis;j++) { 3440 PetscInt plen; 3441 const PetscInt *is_array_idxs; 3442 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3443 send_buffer_idxs_is[psum] = plen; 3444 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3445 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3446 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3447 psum += plen+1; /* indices + lenght */ 3448 } 3449 for (i=0;i<n_sends;i++) { 3450 ilengths_idxs_is[is_indices[i]] = psum; 3451 } 3452 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3453 } 3454 3455 buf_size_idxs = 0; 3456 buf_size_vals = 0; 3457 buf_size_idxs_is = 0; 3458 for (i=0;i<n_recvs;i++) { 3459 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3460 buf_size_vals += (PetscInt)olengths_vals[i]; 3461 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3462 } 3463 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3464 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3465 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3466 3467 /* get new tags for clean communications */ 3468 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3469 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3470 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3471 3472 /* allocate for requests */ 3473 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3474 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3475 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3476 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3477 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3478 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3479 3480 /* communications */ 3481 ptr_idxs = recv_buffer_idxs; 3482 ptr_vals = recv_buffer_vals; 3483 ptr_idxs_is = recv_buffer_idxs_is; 3484 for (i=0;i<n_recvs;i++) { 3485 source_dest = onodes[i]; 3486 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3487 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3488 ptr_idxs += olengths_idxs[i]; 3489 ptr_vals += olengths_vals[i]; 3490 if (nis) { 3491 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3492 ptr_idxs_is += olengths_idxs_is[i]; 3493 } 3494 } 3495 for (i=0;i<n_sends;i++) { 3496 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3497 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3498 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3499 if (nis) { 3500 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3501 } 3502 } 3503 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3504 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3505 3506 /* assemble new l2g map */ 3507 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3508 ptr_idxs = recv_buffer_idxs; 3509 new_local_rows = 0; 3510 for (i=0;i<n_recvs;i++) { 3511 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3512 ptr_idxs += olengths_idxs[i]; 3513 } 3514 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3515 ptr_idxs = recv_buffer_idxs; 3516 new_local_rows = 0; 3517 for (i=0;i<n_recvs;i++) { 3518 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3519 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3520 ptr_idxs += olengths_idxs[i]; 3521 } 3522 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3523 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3524 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3525 3526 /* infer new local matrix type from received local matrices type */ 3527 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3528 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3529 if (n_recvs) { 3530 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3531 ptr_idxs = recv_buffer_idxs; 3532 for (i=0;i<n_recvs;i++) { 3533 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3534 new_local_type_private = MATAIJ_PRIVATE; 3535 break; 3536 } 3537 ptr_idxs += olengths_idxs[i]; 3538 } 3539 switch (new_local_type_private) { 3540 case MATDENSE_PRIVATE: 3541 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3542 new_local_type = MATSEQAIJ; 3543 bs = 1; 3544 } else { /* if I receive only 1 dense matrix */ 3545 new_local_type = MATSEQDENSE; 3546 bs = 1; 3547 } 3548 break; 3549 case MATAIJ_PRIVATE: 3550 new_local_type = MATSEQAIJ; 3551 bs = 1; 3552 break; 3553 case MATBAIJ_PRIVATE: 3554 new_local_type = MATSEQBAIJ; 3555 break; 3556 case MATSBAIJ_PRIVATE: 3557 new_local_type = MATSEQSBAIJ; 3558 break; 3559 default: 3560 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3561 break; 3562 } 3563 } else { /* by default, new_local_type is seqdense */ 3564 new_local_type = MATSEQDENSE; 3565 bs = 1; 3566 } 3567 3568 /* create MATIS object if needed */ 3569 if (reuse == MAT_INITIAL_MATRIX) { 3570 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3571 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3572 } else { 3573 /* it also destroys the local matrices */ 3574 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3575 } 3576 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3577 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3578 3579 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3580 3581 /* Global to local map of received indices */ 3582 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3583 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3584 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3585 3586 /* restore attributes -> type of incoming data and its size */ 3587 buf_size_idxs = 0; 3588 for (i=0;i<n_recvs;i++) { 3589 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3590 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3591 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3592 } 3593 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3594 3595 /* set preallocation */ 3596 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3597 if (!newisdense) { 3598 PetscInt *new_local_nnz=0; 3599 3600 ptr_vals = recv_buffer_vals; 3601 ptr_idxs = recv_buffer_idxs_local; 3602 if (n_recvs) { 3603 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3604 } 3605 for (i=0;i<n_recvs;i++) { 3606 PetscInt j; 3607 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3608 for (j=0;j<*(ptr_idxs+1);j++) { 3609 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3610 } 3611 } else { 3612 /* TODO */ 3613 } 3614 ptr_idxs += olengths_idxs[i]; 3615 } 3616 if (new_local_nnz) { 3617 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3618 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3619 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3620 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3621 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3622 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3623 } else { 3624 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3625 } 3626 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3627 } else { 3628 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3629 } 3630 3631 /* set values */ 3632 ptr_vals = recv_buffer_vals; 3633 ptr_idxs = recv_buffer_idxs_local; 3634 for (i=0;i<n_recvs;i++) { 3635 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3636 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3637 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3638 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3639 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3640 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3641 } else { 3642 /* TODO */ 3643 } 3644 ptr_idxs += olengths_idxs[i]; 3645 ptr_vals += olengths_vals[i]; 3646 } 3647 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3648 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3649 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3650 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3651 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3652 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3653 3654 #if 0 3655 if (!restrict_comm) { /* check */ 3656 Vec lvec,rvec; 3657 PetscReal infty_error; 3658 3659 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3660 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3661 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3662 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3663 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3664 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3665 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3666 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3667 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3668 } 3669 #endif 3670 3671 /* assemble new additional is (if any) */ 3672 if (nis) { 3673 PetscInt **temp_idxs,*count_is,j,psum; 3674 3675 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3676 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3677 ptr_idxs = recv_buffer_idxs_is; 3678 psum = 0; 3679 for (i=0;i<n_recvs;i++) { 3680 for (j=0;j<nis;j++) { 3681 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3682 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3683 psum += plen; 3684 ptr_idxs += plen+1; /* shift pointer to received data */ 3685 } 3686 } 3687 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3688 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3689 for (i=1;i<nis;i++) { 3690 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3691 } 3692 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3693 ptr_idxs = recv_buffer_idxs_is; 3694 for (i=0;i<n_recvs;i++) { 3695 for (j=0;j<nis;j++) { 3696 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3697 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3698 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3699 ptr_idxs += plen+1; /* shift pointer to received data */ 3700 } 3701 } 3702 for (i=0;i<nis;i++) { 3703 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3704 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3705 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3706 } 3707 ierr = PetscFree(count_is);CHKERRQ(ierr); 3708 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3709 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3710 } 3711 /* free workspace */ 3712 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3713 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3714 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3715 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3716 if (isdense) { 3717 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3718 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3719 } else { 3720 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3721 } 3722 if (nis) { 3723 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3724 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3725 } 3726 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3727 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3728 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3729 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3730 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3731 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3732 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3733 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3734 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3735 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3736 ierr = PetscFree(onodes);CHKERRQ(ierr); 3737 if (nis) { 3738 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3739 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3740 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3741 } 3742 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3743 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3744 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3745 for (i=0;i<nis;i++) { 3746 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3747 } 3748 } 3749 PetscFunctionReturn(0); 3750 } 3751 3752 /* temporary hack into ksp private data structure */ 3753 #include <petsc/private/kspimpl.h> 3754 3755 #undef __FUNCT__ 3756 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3757 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3758 { 3759 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3760 PC_IS *pcis = (PC_IS*)pc->data; 3761 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3762 MatNullSpace CoarseNullSpace=NULL; 3763 ISLocalToGlobalMapping coarse_islg; 3764 IS coarse_is,*isarray; 3765 PetscInt i,im_active=-1,active_procs=-1; 3766 PetscInt nis,nisdofs,nisneu; 3767 PC pc_temp; 3768 PCType coarse_pc_type; 3769 KSPType coarse_ksp_type; 3770 PetscBool multilevel_requested,multilevel_allowed; 3771 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3772 Mat t_coarse_mat_is; 3773 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3774 PetscMPIInt all_procs; 3775 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3776 PetscBool compute_vecs = PETSC_FALSE; 3777 PetscScalar *array; 3778 PetscErrorCode ierr; 3779 3780 PetscFunctionBegin; 3781 /* Assign global numbering to coarse dofs */ 3782 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3783 PetscInt ocoarse_size; 3784 compute_vecs = PETSC_TRUE; 3785 ocoarse_size = pcbddc->coarse_size; 3786 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3787 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3788 /* see if we can avoid some work */ 3789 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3790 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3791 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3792 coarse_reuse = PETSC_FALSE; 3793 } else { /* we can safely reuse already computed coarse matrix */ 3794 coarse_reuse = PETSC_TRUE; 3795 } 3796 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3797 coarse_reuse = PETSC_FALSE; 3798 } 3799 /* reset any subassembling information */ 3800 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3801 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3802 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3803 coarse_reuse = PETSC_TRUE; 3804 } 3805 3806 /* count "active" (i.e. with positive local size) and "void" processes */ 3807 im_active = !!(pcis->n); 3808 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3809 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3810 void_procs = all_procs-active_procs; 3811 csin_type_simple = PETSC_TRUE; 3812 redist = PETSC_FALSE; 3813 if (pcbddc->current_level && void_procs) { 3814 csin_ml = PETSC_TRUE; 3815 ncoarse_ml = void_procs; 3816 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3817 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3818 csin_ds = PETSC_TRUE; 3819 ncoarse_ds = pcbddc->redistribute_coarse; 3820 redist = PETSC_TRUE; 3821 } else { 3822 csin_ds = PETSC_TRUE; 3823 ncoarse_ds = active_procs; 3824 redist = PETSC_TRUE; 3825 } 3826 } else { 3827 csin_ml = PETSC_FALSE; 3828 ncoarse_ml = all_procs; 3829 if (void_procs) { 3830 csin_ds = PETSC_TRUE; 3831 ncoarse_ds = void_procs; 3832 csin_type_simple = PETSC_FALSE; 3833 } else { 3834 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3835 csin_ds = PETSC_TRUE; 3836 ncoarse_ds = pcbddc->redistribute_coarse; 3837 redist = PETSC_TRUE; 3838 } else { 3839 csin_ds = PETSC_FALSE; 3840 ncoarse_ds = all_procs; 3841 } 3842 } 3843 } 3844 3845 /* 3846 test if we can go multilevel: three conditions must be satisfied: 3847 - we have not exceeded the number of levels requested 3848 - we can actually subassemble the active processes 3849 - we can find a suitable number of MPI processes where we can place the subassembled problem 3850 */ 3851 multilevel_allowed = PETSC_FALSE; 3852 multilevel_requested = PETSC_FALSE; 3853 if (pcbddc->current_level < pcbddc->max_levels) { 3854 multilevel_requested = PETSC_TRUE; 3855 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3856 multilevel_allowed = PETSC_FALSE; 3857 } else { 3858 multilevel_allowed = PETSC_TRUE; 3859 } 3860 } 3861 /* determine number of process partecipating to coarse solver */ 3862 if (multilevel_allowed) { 3863 ncoarse = ncoarse_ml; 3864 csin = csin_ml; 3865 redist = PETSC_FALSE; 3866 } else { 3867 ncoarse = ncoarse_ds; 3868 csin = csin_ds; 3869 } 3870 3871 /* creates temporary l2gmap and IS for coarse indexes */ 3872 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3873 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3874 3875 /* creates temporary MATIS object for coarse matrix */ 3876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3877 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3878 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3879 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3880 #if 0 3881 { 3882 PetscViewer viewer; 3883 char filename[256]; 3884 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3885 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3886 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3887 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3888 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3889 } 3890 #endif 3891 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3892 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3893 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3894 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3895 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3896 3897 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3898 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3899 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3900 const PetscInt *idxs; 3901 ISLocalToGlobalMapping tmap; 3902 3903 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3904 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3905 /* allocate space for temporary storage */ 3906 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3907 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3908 /* allocate for IS array */ 3909 nisdofs = pcbddc->n_ISForDofsLocal; 3910 nisneu = !!pcbddc->NeumannBoundariesLocal; 3911 nis = nisdofs + nisneu; 3912 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3913 /* dofs splitting */ 3914 for (i=0;i<nisdofs;i++) { 3915 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3916 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3917 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3918 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3919 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3920 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3921 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3922 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3923 } 3924 /* neumann boundaries */ 3925 if (pcbddc->NeumannBoundariesLocal) { 3926 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3927 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3928 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3929 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3930 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3931 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3932 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3933 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3934 } 3935 /* free memory */ 3936 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3937 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3938 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3939 } else { 3940 nis = 0; 3941 nisdofs = 0; 3942 nisneu = 0; 3943 isarray = NULL; 3944 } 3945 /* destroy no longer needed map */ 3946 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3947 3948 /* restrict on coarse candidates (if needed) */ 3949 coarse_mat_is = NULL; 3950 if (csin) { 3951 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3952 if (redist) { 3953 PetscMPIInt rank; 3954 PetscInt spc,n_spc_p1,dest[1],destsize; 3955 3956 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3957 spc = active_procs/ncoarse; 3958 n_spc_p1 = active_procs%ncoarse; 3959 if (im_active) { 3960 destsize = 1; 3961 if (rank > n_spc_p1*(spc+1)-1) { 3962 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3963 } else { 3964 dest[0] = rank/(spc+1); 3965 } 3966 } else { 3967 destsize = 0; 3968 } 3969 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3970 } else if (csin_type_simple) { 3971 PetscMPIInt rank; 3972 PetscInt issize,isidx; 3973 3974 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3975 if (im_active) { 3976 issize = 1; 3977 isidx = (PetscInt)rank; 3978 } else { 3979 issize = 0; 3980 isidx = -1; 3981 } 3982 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3983 } else { /* get a suitable subassembling pattern from MATIS code */ 3984 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3985 } 3986 3987 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 3988 if (!redist || ncoarse <= void_procs) { 3989 PetscInt ncoarse_cand,tissize,*nisindices; 3990 PetscInt *coarse_candidates; 3991 const PetscInt* tisindices; 3992 3993 /* get coarse candidates' ranks in pc communicator */ 3994 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 3995 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3996 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 3997 if (!coarse_candidates[i]) { 3998 coarse_candidates[ncoarse_cand++]=i; 3999 } 4000 } 4001 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4002 4003 4004 if (pcbddc->dbg_flag) { 4005 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4006 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4007 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4008 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4009 for (i=0;i<ncoarse_cand;i++) { 4010 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4011 } 4012 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4013 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4014 } 4015 /* shift the pattern on coarse candidates */ 4016 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4017 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4018 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4019 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4020 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4021 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4022 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4023 } 4024 if (pcbddc->dbg_flag) { 4025 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4026 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4027 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4028 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4029 } 4030 } 4031 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4032 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4033 } else { 4034 if (pcbddc->dbg_flag) { 4035 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4036 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4037 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4038 } 4039 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4040 coarse_mat_is = t_coarse_mat_is; 4041 } 4042 4043 /* create local to global scatters for coarse problem */ 4044 if (compute_vecs) { 4045 PetscInt lrows; 4046 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4047 if (coarse_mat_is) { 4048 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4049 } else { 4050 lrows = 0; 4051 } 4052 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4053 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4054 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4055 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4056 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4057 } 4058 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4059 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4060 4061 /* set defaults for coarse KSP and PC */ 4062 if (multilevel_allowed) { 4063 coarse_ksp_type = KSPRICHARDSON; 4064 coarse_pc_type = PCBDDC; 4065 } else { 4066 coarse_ksp_type = KSPPREONLY; 4067 coarse_pc_type = PCREDUNDANT; 4068 } 4069 4070 /* print some info if requested */ 4071 if (pcbddc->dbg_flag) { 4072 if (!multilevel_allowed) { 4073 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4074 if (multilevel_requested) { 4075 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4076 } else if (pcbddc->max_levels) { 4077 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4078 } 4079 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4080 } 4081 } 4082 4083 /* create the coarse KSP object only once with defaults */ 4084 if (coarse_mat_is) { 4085 MatReuse coarse_mat_reuse; 4086 PetscViewer dbg_viewer = NULL; 4087 if (pcbddc->dbg_flag) { 4088 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4089 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4090 } 4091 if (!pcbddc->coarse_ksp) { 4092 char prefix[256],str_level[16]; 4093 size_t len; 4094 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4095 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4096 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4097 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4098 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4099 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4100 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4101 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4102 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4103 /* prefix */ 4104 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4105 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4106 if (!pcbddc->current_level) { 4107 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4108 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4109 } else { 4110 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4111 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4112 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4113 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4114 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4115 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4116 } 4117 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4118 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4119 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4120 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4121 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4122 /* allow user customization */ 4123 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4124 } 4125 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4126 if (nisdofs) { 4127 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4128 for (i=0;i<nisdofs;i++) { 4129 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4130 } 4131 } 4132 if (nisneu) { 4133 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4134 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4135 } 4136 4137 /* get some info after set from options */ 4138 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4139 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4140 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4141 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4142 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4143 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4144 isbddc = PETSC_FALSE; 4145 } 4146 if (isredundant) { 4147 KSP inner_ksp; 4148 PC inner_pc; 4149 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4150 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4151 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4152 } 4153 4154 /* assemble coarse matrix */ 4155 if (coarse_reuse) { 4156 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4157 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4158 coarse_mat_reuse = MAT_REUSE_MATRIX; 4159 } else { 4160 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4161 } 4162 if (isbddc || isnn) { 4163 if (pcbddc->coarsening_ratio > 1) { 4164 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4165 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4166 if (pcbddc->dbg_flag) { 4167 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4168 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4169 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4170 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4171 } 4172 } 4173 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4174 } else { 4175 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4176 coarse_mat = coarse_mat_is; 4177 } 4178 } else { 4179 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4180 } 4181 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4182 4183 /* propagate symmetry info to coarse matrix */ 4184 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4185 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4186 4187 /* set operators */ 4188 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4189 if (pcbddc->dbg_flag) { 4190 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4191 } 4192 } else { /* processes non partecipating to coarse solver (if any) */ 4193 coarse_mat = 0; 4194 } 4195 ierr = PetscFree(isarray);CHKERRQ(ierr); 4196 #if 0 4197 { 4198 PetscViewer viewer; 4199 char filename[256]; 4200 sprintf(filename,"coarse_mat.m"); 4201 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4202 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4203 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4204 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4205 } 4206 #endif 4207 4208 /* Compute coarse null space (special handling by BDDC only) */ 4209 if (pcbddc->NullSpace) { 4210 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4211 } 4212 4213 if (pcbddc->coarse_ksp) { 4214 Vec crhs,csol; 4215 PetscBool ispreonly; 4216 if (CoarseNullSpace) { 4217 if (isbddc) { 4218 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4219 } else { 4220 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4221 } 4222 } 4223 /* setup coarse ksp */ 4224 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4225 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4226 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4227 /* hack */ 4228 if (!csol) { 4229 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4230 } 4231 if (!crhs) { 4232 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4233 } 4234 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4235 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4236 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4237 KSP check_ksp; 4238 KSPType check_ksp_type; 4239 PC check_pc; 4240 Vec check_vec,coarse_vec; 4241 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4242 PetscInt its; 4243 PetscBool compute_eigs; 4244 PetscReal *eigs_r,*eigs_c; 4245 PetscInt neigs; 4246 const char *prefix; 4247 4248 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4249 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4250 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4251 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4252 if (ispreonly) { 4253 check_ksp_type = KSPPREONLY; 4254 compute_eigs = PETSC_FALSE; 4255 } else { 4256 check_ksp_type = KSPGMRES; 4257 compute_eigs = PETSC_TRUE; 4258 } 4259 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4260 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4261 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4262 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4263 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4264 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4265 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4266 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4267 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4268 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4269 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4270 /* create random vec */ 4271 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4272 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4273 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4274 if (CoarseNullSpace) { 4275 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4276 } 4277 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4278 /* solve coarse problem */ 4279 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4280 if (CoarseNullSpace) { 4281 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4282 } 4283 /* set eigenvalue estimation if preonly has not been requested */ 4284 if (compute_eigs) { 4285 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4286 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4287 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4288 lambda_max = eigs_r[neigs-1]; 4289 lambda_min = eigs_r[0]; 4290 if (pcbddc->use_coarse_estimates) { 4291 if (lambda_max>lambda_min) { 4292 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4293 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4294 } 4295 } 4296 } 4297 4298 /* check coarse problem residual error */ 4299 if (pcbddc->dbg_flag) { 4300 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4301 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4302 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4303 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4304 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4305 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4306 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4307 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4308 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4309 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4310 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4311 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4312 if (compute_eigs) { 4313 PetscReal lambda_max_s,lambda_min_s; 4314 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4315 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4316 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4317 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4318 for (i=0;i<neigs;i++) { 4319 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4320 } 4321 } 4322 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4323 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4324 } 4325 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4326 if (compute_eigs) { 4327 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4328 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4329 } 4330 } 4331 } 4332 /* print additional info */ 4333 if (pcbddc->dbg_flag) { 4334 /* waits until all processes reaches this point */ 4335 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4336 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4337 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4338 } 4339 4340 /* free memory */ 4341 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4342 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4343 PetscFunctionReturn(0); 4344 } 4345 4346 #undef __FUNCT__ 4347 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4348 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4349 { 4350 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4351 PC_IS* pcis = (PC_IS*)pc->data; 4352 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4353 PetscInt i,coarse_size=0; 4354 PetscInt *local_primal_indices=NULL; 4355 PetscErrorCode ierr; 4356 4357 PetscFunctionBegin; 4358 /* Compute global number of coarse dofs */ 4359 if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) { 4360 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created"); 4361 } 4362 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 4363 4364 /* check numbering */ 4365 if (pcbddc->dbg_flag) { 4366 PetscScalar coarsesum,*array; 4367 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4368 4369 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4370 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4371 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4372 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4373 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4374 for (i=0;i<pcbddc->local_primal_size;i++) { 4375 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4376 } 4377 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4378 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4379 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4380 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4381 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4382 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4383 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4384 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4385 for (i=0;i<pcis->n;i++) { 4386 if (array[i] == 1.0) { 4387 set_error = PETSC_TRUE; 4388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4389 } 4390 } 4391 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4392 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4393 for (i=0;i<pcis->n;i++) { 4394 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4395 } 4396 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4397 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4398 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4399 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4400 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4401 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4402 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4403 PetscInt *gidxs; 4404 4405 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4406 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4407 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4409 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4410 for (i=0;i<pcbddc->local_primal_size;i++) { 4411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4412 } 4413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4414 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4415 } 4416 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4417 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4418 } 4419 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4420 /* get back data */ 4421 *coarse_size_n = coarse_size; 4422 *local_primal_indices_n = local_primal_indices; 4423 PetscFunctionReturn(0); 4424 } 4425 4426 #undef __FUNCT__ 4427 #define __FUNCT__ "PCBDDCGlobalToLocal" 4428 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4429 { 4430 IS localis_t; 4431 PetscInt i,lsize,*idxs,n; 4432 PetscScalar *vals; 4433 PetscErrorCode ierr; 4434 4435 PetscFunctionBegin; 4436 /* get indices in local ordering exploiting local to global map */ 4437 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4438 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4439 for (i=0;i<lsize;i++) vals[i] = 1.0; 4440 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4441 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4442 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4443 if (idxs) { /* multilevel guard */ 4444 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4445 } 4446 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4447 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4448 ierr = PetscFree(vals);CHKERRQ(ierr); 4449 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4450 /* now compute set in local ordering */ 4451 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4452 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4453 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4454 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4455 for (i=0,lsize=0;i<n;i++) { 4456 if (PetscRealPart(vals[i]) > 0.5) { 4457 lsize++; 4458 } 4459 } 4460 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4461 for (i=0,lsize=0;i<n;i++) { 4462 if (PetscRealPart(vals[i]) > 0.5) { 4463 idxs[lsize++] = i; 4464 } 4465 } 4466 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4467 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4468 *localis = localis_t; 4469 PetscFunctionReturn(0); 4470 } 4471 4472 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4473 #undef __FUNCT__ 4474 #define __FUNCT__ "PCBDDCMatMult_Private" 4475 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4476 { 4477 PCBDDCChange_ctx change_ctx; 4478 PetscErrorCode ierr; 4479 4480 PetscFunctionBegin; 4481 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4482 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4483 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4484 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4485 PetscFunctionReturn(0); 4486 } 4487 4488 #undef __FUNCT__ 4489 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4490 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4491 { 4492 PCBDDCChange_ctx change_ctx; 4493 PetscErrorCode ierr; 4494 4495 PetscFunctionBegin; 4496 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4497 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4498 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4499 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4500 PetscFunctionReturn(0); 4501 } 4502 4503 #undef __FUNCT__ 4504 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4505 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4506 { 4507 PC_IS *pcis=(PC_IS*)pc->data; 4508 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4509 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4510 Mat S_j; 4511 PetscInt *used_xadj,*used_adjncy; 4512 PetscBool free_used_adj; 4513 PetscErrorCode ierr; 4514 4515 PetscFunctionBegin; 4516 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4517 free_used_adj = PETSC_FALSE; 4518 if (pcbddc->sub_schurs_layers == -1) { 4519 used_xadj = NULL; 4520 used_adjncy = NULL; 4521 } else { 4522 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4523 used_xadj = pcbddc->mat_graph->xadj; 4524 used_adjncy = pcbddc->mat_graph->adjncy; 4525 } else if (pcbddc->computed_rowadj) { 4526 used_xadj = pcbddc->mat_graph->xadj; 4527 used_adjncy = pcbddc->mat_graph->adjncy; 4528 } else { 4529 PetscBool flg_row=PETSC_FALSE; 4530 const PetscInt *xadj,*adjncy; 4531 PetscInt nvtxs; 4532 4533 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4534 if (flg_row) { 4535 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4536 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4537 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4538 free_used_adj = PETSC_TRUE; 4539 } else { 4540 pcbddc->sub_schurs_layers = -1; 4541 used_xadj = NULL; 4542 used_adjncy = NULL; 4543 } 4544 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4545 } 4546 } 4547 4548 /* setup sub_schurs data */ 4549 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4550 if (!sub_schurs->use_mumps) { 4551 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4552 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4553 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4554 } else { 4555 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4556 } 4557 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4558 4559 /* free adjacency */ 4560 if (free_used_adj) { 4561 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4562 } 4563 PetscFunctionReturn(0); 4564 } 4565 4566 #undef __FUNCT__ 4567 #define __FUNCT__ "PCBDDCInitSubSchurs" 4568 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4569 { 4570 PC_IS *pcis=(PC_IS*)pc->data; 4571 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4572 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4573 PCBDDCGraph graph; 4574 PetscErrorCode ierr; 4575 4576 PetscFunctionBegin; 4577 /* attach interface graph for determining subsets */ 4578 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4579 IS verticesIS; 4580 4581 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4582 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4583 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4584 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4585 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4586 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4587 /* 4588 if (pcbddc->dbg_flag) { 4589 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4590 } 4591 */ 4592 } else { 4593 graph = pcbddc->mat_graph; 4594 } 4595 4596 /* sub_schurs init */ 4597 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4598 4599 /* free graph struct */ 4600 if (pcbddc->sub_schurs_rebuild) { 4601 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4602 } 4603 PetscFunctionReturn(0); 4604 } 4605