1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */ 112 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 if (pcbddc->local_auxmat2) { 434 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 435 ierr = PetscFree(array);CHKERRQ(ierr); 436 } 437 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 438 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 439 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 440 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 441 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 442 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 443 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 444 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 445 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 446 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 447 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 448 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 449 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 450 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 451 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 452 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 453 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 459 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 460 { 461 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 462 PC_IS *pcis = (PC_IS*)pc->data; 463 VecType impVecType; 464 PetscInt n_constraints,n_R,old_size; 465 PetscErrorCode ierr; 466 467 PetscFunctionBegin; 468 if (!pcbddc->ConstraintMatrix) { 469 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 470 } 471 /* get sizes */ 472 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 473 n_R = pcis->n-pcbddc->n_vertices; 474 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 475 /* local work vectors (try to avoid unneeded work)*/ 476 /* R nodes */ 477 old_size = -1; 478 if (pcbddc->vec1_R) { 479 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 480 } 481 if (n_R != old_size) { 482 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 483 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 484 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 485 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 486 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 487 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 488 } 489 /* local primal dofs */ 490 old_size = -1; 491 if (pcbddc->vec1_P) { 492 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 493 } 494 if (pcbddc->local_primal_size != old_size) { 495 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 496 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 497 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 498 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 499 } 500 /* local explicit constraints */ 501 old_size = -1; 502 if (pcbddc->vec1_C) { 503 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 504 } 505 if (n_constraints && n_constraints != old_size) { 506 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 507 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 508 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 509 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 510 } 511 PetscFunctionReturn(0); 512 } 513 514 #undef __FUNCT__ 515 #define __FUNCT__ "PCBDDCSetUpCorrection" 516 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 517 { 518 PetscErrorCode ierr; 519 /* pointers to pcis and pcbddc */ 520 PC_IS* pcis = (PC_IS*)pc->data; 521 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 522 /* submatrices of local problem */ 523 Mat A_RV,A_VR,A_VV; 524 /* submatrices of local coarse problem */ 525 Mat S_VV,S_CV,S_VC,S_CC; 526 /* working matrices */ 527 Mat C_CR; 528 /* additional working stuff */ 529 PC pc_R; 530 Mat F; 531 PetscBool isLU,isCHOL,isILU; 532 533 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 534 PetscScalar *work; 535 PetscInt *idx_V_B; 536 PetscInt n,n_vertices,n_constraints; 537 PetscInt i,n_R,n_D,n_B; 538 PetscBool unsymmetric_check; 539 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 540 MatType impMatType; 541 /* some shortcuts to scalars */ 542 PetscScalar one=1.0,m_one=-1.0; 543 544 PetscFunctionBegin; 545 n_vertices = pcbddc->n_vertices; 546 n_constraints = pcbddc->local_primal_size-n_vertices; 547 /* Set Non-overlapping dimensions */ 548 n_B = pcis->n_B; 549 n_D = pcis->n - n_B; 550 n_R = pcis->n - n_vertices; 551 552 /* Set types for local objects needed by BDDC precondtioner */ 553 impMatType = MATSEQDENSE; 554 555 /* vertices in boundary numbering */ 556 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 557 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 558 if (i != n_vertices) { 559 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 560 } 561 562 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 563 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 565 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 566 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 567 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 568 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 569 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 570 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 571 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 572 573 unsymmetric_check = PETSC_FALSE; 574 /* allocate workspace */ 575 n = 0; 576 if (n_constraints) { 577 n += n_R*n_constraints; 578 } 579 if (n_vertices) { 580 n = PetscMax(2*n_R*n_vertices,n); 581 } 582 if (!pcbddc->issym) { 583 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 584 unsymmetric_check = PETSC_TRUE; 585 } 586 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 587 588 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 589 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 590 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 591 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 592 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 593 if (isLU || isILU || isCHOL) { 594 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 595 } else { 596 F = NULL; 597 } 598 599 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 600 if (n_constraints) { 601 Mat M1,M2,M3; 602 IS is_aux; 603 /* see if we can save some allocations */ 604 if (pcbddc->local_auxmat2) { 605 PetscInt on_R,on_constraints; 606 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 607 if (on_R != n_R || on_constraints != n_constraints) { 608 PetscScalar *marray; 609 610 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr); 611 ierr = PetscFree(marray);CHKERRQ(ierr); 612 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 613 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 614 } 615 } 616 /* auxiliary matrices */ 617 if (!pcbddc->local_auxmat2) { 618 PetscScalar *marray; 619 620 ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr); 621 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr); 622 marray += n_R*n_constraints; 623 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr); 624 } 625 626 /* Extract constraints on R nodes: C_{CR} */ 627 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 628 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 629 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 630 631 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 632 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 633 for (i=0;i<n_constraints;i++) { 634 const PetscScalar *row_cmat_values; 635 const PetscInt *row_cmat_indices; 636 PetscInt size_of_constraint,j; 637 638 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 639 for (j=0;j<size_of_constraint;j++) { 640 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 641 } 642 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 643 } 644 if (F) { 645 Mat B; 646 647 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 648 ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr); 649 ierr = MatDestroy(&B);CHKERRQ(ierr); 650 } else { 651 PetscScalar *xarray; 652 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 653 for (i=0;i<n_constraints;i++) { 654 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 655 ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr); 656 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 657 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 658 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 659 } 660 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr); 661 } 662 663 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 664 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 665 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 666 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 667 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 668 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 669 ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr); 670 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 671 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 672 ierr = MatDestroy(&M2);CHKERRQ(ierr); 673 ierr = MatDestroy(&M3);CHKERRQ(ierr); 674 /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */ 675 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 676 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 677 ierr = MatDestroy(&M1);CHKERRQ(ierr); 678 } 679 /* Get submatrices from subdomain matrix */ 680 if (n_vertices) { 681 Mat newmat; 682 IS is_aux; 683 PetscInt ibs,mbs; 684 PetscBool issbaij; 685 686 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 687 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 688 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 689 if (ibs != mbs) { /* need to convert to SEQAIJ */ 690 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 691 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 692 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 693 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 694 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 695 } else { 696 /* this is safe */ 697 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 698 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 699 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 700 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 701 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 702 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 703 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 704 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 705 } else { 706 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 707 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 708 } 709 } 710 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 711 } 712 713 /* Matrix of coarse basis functions (local) */ 714 if (pcbddc->coarse_phi_B) { 715 PetscInt on_B,on_primal,on_D=n_D; 716 if (pcbddc->coarse_phi_D) { 717 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 718 } 719 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 720 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 721 PetscScalar *marray; 722 723 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 724 ierr = PetscFree(marray);CHKERRQ(ierr); 725 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 726 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 727 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 729 } 730 } 731 732 if (!pcbddc->coarse_phi_B) { 733 PetscScalar *marray; 734 735 n = n_B*pcbddc->local_primal_size; 736 if (pcbddc->switch_static || pcbddc->dbg_flag) { 737 n += n_D*pcbddc->local_primal_size; 738 } 739 if (!pcbddc->issym) { 740 n *= 2; 741 } 742 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 743 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 744 n = n_B*pcbddc->local_primal_size; 745 if (pcbddc->switch_static || pcbddc->dbg_flag) { 746 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 747 n += n_D*pcbddc->local_primal_size; 748 } 749 if (!pcbddc->issym) { 750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 751 if (pcbddc->switch_static || pcbddc->dbg_flag) { 752 n = n_B*pcbddc->local_primal_size; 753 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 754 } 755 } else { 756 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 757 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 758 if (pcbddc->switch_static || pcbddc->dbg_flag) { 759 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 760 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 761 } 762 } 763 } 764 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 765 /* vertices */ 766 if (n_vertices) { 767 768 if (n_R) { 769 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 770 PetscBLASInt B_N,B_one = 1; 771 PetscScalar *x,*y; 772 773 ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 774 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 775 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 776 if (F) { 777 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 778 } else { 779 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 780 for (i=0;i<n_vertices;i++) { 781 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 782 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 783 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 784 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 785 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 786 } 787 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 788 } 789 ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr); 790 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 791 if (n_constraints) { 792 Mat B; 793 ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 794 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 795 ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 796 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 797 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 798 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 799 ierr = MatDestroy(&B);CHKERRQ(ierr); 800 } 801 ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 802 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 803 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 804 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 805 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 806 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 807 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 808 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 809 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 810 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 811 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 812 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 813 } else { 814 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 815 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 816 } 817 /* coarse basis functions */ 818 for (i=0;i<n_vertices;i++) { 819 PetscScalar *y; 820 821 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 822 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 823 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 824 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 825 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 y[n_B*i+idx_V_B[i]] = 1.0; 827 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 828 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 829 830 if (pcbddc->switch_static || pcbddc->dbg_flag) { 831 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 832 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 833 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 836 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 837 } 838 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 839 } 840 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 841 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 842 } 843 844 if (n_constraints) { 845 Mat B; 846 847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 848 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 849 ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 850 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 851 if (n_vertices) { 852 ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr); 853 } 854 ierr = MatDestroy(&B);CHKERRQ(ierr); 855 /* coarse basis functions */ 856 for (i=0;i<n_constraints;i++) { 857 PetscScalar *y; 858 859 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 860 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 861 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 862 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 863 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 865 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 866 867 if (pcbddc->switch_static || pcbddc->dbg_flag) { 868 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 869 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 870 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 873 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 874 } 875 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 876 } 877 } 878 879 /* compute other basis functions for non-symmetric problems */ 880 if (!pcbddc->issym) { 881 Mat B,X; 882 883 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr); 884 885 if (n_constraints) { 886 Mat S_CCT,B_C; 887 888 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 889 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 890 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 891 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 892 if (n_vertices) { 893 Mat B_V,S_VCT; 894 895 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 896 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 897 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 898 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 899 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 900 } 901 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 902 } 903 if (n_vertices && n_R) { 904 Mat A_VRT; 905 PetscBLASInt B_N,B_one = 1; 906 907 if (!n_constraints) { /* if there are no constraints, reset work */ 908 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 909 } 910 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr); 911 ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 912 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 913 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one)); 914 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 915 } 916 917 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr); 918 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 919 for (i=0;i<pcbddc->local_primal_size;i++) { 920 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 921 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 922 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 923 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 924 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 925 } 926 } else { 927 for (i=0;i<pcbddc->local_primal_size;i++) { 928 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 929 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 930 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 931 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 932 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 933 } 934 } 935 ierr = MatDestroy(&B);CHKERRQ(ierr); 936 /* coarse basis functions */ 937 for (i=0;i<pcbddc->local_primal_size;i++) { 938 PetscScalar *y; 939 940 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 941 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 942 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 943 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 944 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 if (i<n_vertices) { 946 y[n_B*i+idx_V_B[i]] = 1.0; 947 } 948 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 949 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 950 951 if (pcbddc->switch_static || pcbddc->dbg_flag) { 952 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 953 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 954 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 957 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 958 } 959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 960 } 961 ierr = MatDestroy(&X);CHKERRQ(ierr); 962 } 963 964 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 965 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 966 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 967 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 968 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 969 /* Checking coarse_sub_mat and coarse basis functios */ 970 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 971 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 972 if (pcbddc->dbg_flag) { 973 Mat coarse_sub_mat; 974 Mat AUXMAT,TM1,TM2,TM3,TM4; 975 Mat coarse_phi_D,coarse_phi_B; 976 Mat coarse_psi_D,coarse_psi_B; 977 Mat A_II,A_BB,A_IB,A_BI; 978 Mat C_B,CPHI; 979 IS is_dummy; 980 Vec mones; 981 MatType checkmattype=MATSEQAIJ; 982 PetscReal real_value; 983 984 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 985 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 986 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 987 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 988 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 989 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 990 if (unsymmetric_check) { 991 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 992 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 993 } 994 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 995 996 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 997 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 998 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 999 if (unsymmetric_check) { 1000 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1001 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1002 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1003 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1004 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1005 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1006 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1007 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1008 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1009 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1010 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1011 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1012 } else { 1013 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1014 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1015 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1016 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1017 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1018 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1019 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1020 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1021 } 1022 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1023 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1024 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1025 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1026 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1027 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1028 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1029 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1030 1031 /* check constraints */ 1032 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1033 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1034 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1035 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1036 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1037 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1038 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1039 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1040 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1041 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1042 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1043 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1044 1045 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1046 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1047 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1048 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1049 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1050 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1051 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1052 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1053 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1054 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1055 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1056 if (unsymmetric_check) { 1057 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1058 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1059 } 1060 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1061 } 1062 1063 /* free memory */ 1064 ierr = PetscFree(work);CHKERRQ(ierr); 1065 if (n_vertices) { 1066 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1067 } 1068 if (n_constraints) { 1069 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1070 } 1071 /* get back data */ 1072 *coarse_submat_vals_n = coarse_submat_vals; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNCT__ 1077 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1078 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1079 { 1080 Mat *work_mat; 1081 IS isrow_s,iscol_s; 1082 PetscBool rsorted,csorted; 1083 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1088 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1089 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1090 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1091 1092 if (!rsorted) { 1093 const PetscInt *idxs; 1094 PetscInt *idxs_sorted,i; 1095 1096 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1097 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1098 for (i=0;i<rsize;i++) { 1099 idxs_perm_r[i] = i; 1100 } 1101 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1102 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1103 for (i=0;i<rsize;i++) { 1104 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1105 } 1106 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1108 } else { 1109 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1110 isrow_s = isrow; 1111 } 1112 1113 if (!csorted) { 1114 if (isrow == iscol) { 1115 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1116 iscol_s = isrow_s; 1117 } else { 1118 const PetscInt *idxs; 1119 PetscInt *idxs_sorted,i; 1120 1121 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1122 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1123 for (i=0;i<csize;i++) { 1124 idxs_perm_c[i] = i; 1125 } 1126 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1127 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1128 for (i=0;i<csize;i++) { 1129 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1130 } 1131 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1133 } 1134 } else { 1135 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1136 iscol_s = iscol; 1137 } 1138 1139 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1140 1141 if (!rsorted || !csorted) { 1142 Mat new_mat; 1143 IS is_perm_r,is_perm_c; 1144 1145 if (!rsorted) { 1146 PetscInt *idxs_r,i; 1147 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1148 for (i=0;i<rsize;i++) { 1149 idxs_r[idxs_perm_r[i]] = i; 1150 } 1151 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1152 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1153 } else { 1154 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1155 } 1156 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1157 1158 if (!csorted) { 1159 if (isrow_s == iscol_s) { 1160 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1161 is_perm_c = is_perm_r; 1162 } else { 1163 PetscInt *idxs_c,i; 1164 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1165 for (i=0;i<csize;i++) { 1166 idxs_c[idxs_perm_c[i]] = i; 1167 } 1168 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1169 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1170 } 1171 } else { 1172 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1173 } 1174 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1175 1176 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1177 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1178 work_mat[0] = new_mat; 1179 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1180 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1181 } 1182 1183 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1184 *B = work_mat[0]; 1185 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1186 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1187 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1193 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1194 { 1195 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1196 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1197 Mat new_mat; 1198 IS is_local,is_global; 1199 PetscInt local_size; 1200 PetscBool isseqaij; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1205 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1206 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1207 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1208 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1209 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1210 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1211 1212 /* check */ 1213 if (pcbddc->dbg_flag) { 1214 Vec x,x_change; 1215 PetscReal error; 1216 1217 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1218 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1219 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1220 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1221 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1222 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1223 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1226 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1227 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1228 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1229 ierr = VecDestroy(&x);CHKERRQ(ierr); 1230 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1231 } 1232 1233 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1234 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1235 if (isseqaij) { 1236 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1237 } else { 1238 Mat work_mat; 1239 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1240 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1241 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1242 } 1243 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1244 #if !defined(PETSC_USE_COMPLEX) 1245 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1246 #endif 1247 /* 1248 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1249 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1250 */ 1251 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1257 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1258 { 1259 PC_IS* pcis = (PC_IS*)(pc->data); 1260 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1261 IS is_aux1,is_aux2; 1262 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1263 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1264 PetscInt vbs,bs; 1265 PetscBT bitmask; 1266 PetscErrorCode ierr; 1267 1268 PetscFunctionBegin; 1269 /* 1270 No need to setup local scatters if 1271 - primal space is unchanged 1272 AND 1273 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1274 AND 1275 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1276 */ 1277 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1278 PetscFunctionReturn(0); 1279 } 1280 /* destroy old objects */ 1281 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1282 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1283 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1284 /* Set Non-overlapping dimensions */ 1285 n_B = pcis->n_B; 1286 n_D = pcis->n - n_B; 1287 n_vertices = pcbddc->n_vertices; 1288 /* create auxiliary bitmask */ 1289 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1290 for (i=0;i<n_vertices;i++) { 1291 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1292 } 1293 1294 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1295 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1296 for (i=0, n_R=0; i<pcis->n; i++) { 1297 if (!PetscBTLookup(bitmask,i)) { 1298 idx_R_local[n_R] = i; 1299 n_R++; 1300 } 1301 } 1302 1303 /* Block code */ 1304 vbs = 1; 1305 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1306 if (bs>1 && !(n_vertices%bs)) { 1307 PetscBool is_blocked = PETSC_TRUE; 1308 PetscInt *vary; 1309 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1310 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1311 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1312 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1313 for (i=0; i<n_vertices/bs; i++) { 1314 if (vary[i]!=0 && vary[i]!=bs) { 1315 is_blocked = PETSC_FALSE; 1316 break; 1317 } 1318 } 1319 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1320 vbs = bs; 1321 for (i=0;i<n_R/vbs;i++) { 1322 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1323 } 1324 } 1325 ierr = PetscFree(vary);CHKERRQ(ierr); 1326 } 1327 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1328 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1329 1330 /* print some info if requested */ 1331 if (pcbddc->dbg_flag) { 1332 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1333 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1338 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1339 } 1340 1341 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1342 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1343 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1344 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1345 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1346 for (i=0; i<n_D; i++) { 1347 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1348 } 1349 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1350 for (i=0, j=0; i<n_R; i++) { 1351 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1352 aux_array1[j++] = i; 1353 } 1354 } 1355 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1356 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1357 for (i=0, j=0; i<n_B; i++) { 1358 if (!PetscBTLookup(bitmask,is_indices[i])) { 1359 aux_array2[j++] = i; 1360 } 1361 } 1362 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1363 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1364 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1365 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1366 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1367 1368 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1369 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1370 for (i=0, j=0; i<n_R; i++) { 1371 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1372 aux_array1[j++] = i; 1373 } 1374 } 1375 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1376 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1377 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1378 } 1379 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1380 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1387 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1388 { 1389 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1390 PC_IS *pcis = (PC_IS*)pc->data; 1391 PC pc_temp; 1392 Mat A_RR; 1393 MatReuse reuse; 1394 PetscScalar m_one = -1.0; 1395 PetscReal value; 1396 PetscInt n_D,n_R,ibs,mbs; 1397 PetscBool use_exact,use_exact_reduced,issbaij; 1398 PetscErrorCode ierr; 1399 /* prefixes stuff */ 1400 char dir_prefix[256],neu_prefix[256],str_level[16]; 1401 size_t len; 1402 1403 PetscFunctionBegin; 1404 1405 /* compute prefixes */ 1406 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1407 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1408 if (!pcbddc->current_level) { 1409 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1410 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1411 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1412 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1413 } else { 1414 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1415 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1416 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1417 len -= 15; /* remove "pc_bddc_coarse_" */ 1418 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1419 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1420 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1421 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1422 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1423 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1424 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1425 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1426 } 1427 1428 /* DIRICHLET PROBLEM */ 1429 if (dirichlet) { 1430 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1431 if (pcbddc->issym) { 1432 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1433 } 1434 /* Matrix for Dirichlet problem is pcis->A_II */ 1435 n_D = pcis->n - pcis->n_B; 1436 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1437 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1438 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1439 /* default */ 1440 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1441 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1443 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1444 if (issbaij) { 1445 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1446 } else { 1447 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1448 } 1449 /* Allow user's customization */ 1450 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1451 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1452 } 1453 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1454 if (sub_schurs->interior_solver) { 1455 ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr); 1456 } 1457 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1458 if (!n_D) { 1459 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1460 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1461 } 1462 /* Set Up KSP for Dirichlet problem of BDDC */ 1463 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1464 /* set ksp_D into pcis data */ 1465 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1466 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1467 pcis->ksp_D = pcbddc->ksp_D; 1468 } 1469 1470 /* NEUMANN PROBLEM */ 1471 A_RR = 0; 1472 if (neumann) { 1473 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1474 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1475 if (pcbddc->ksp_R) { /* already created ksp */ 1476 PetscInt nn_R; 1477 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1478 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1479 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1480 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1481 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1482 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1483 reuse = MAT_INITIAL_MATRIX; 1484 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1485 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1486 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1487 reuse = MAT_INITIAL_MATRIX; 1488 } else { /* safe to reuse the matrix */ 1489 reuse = MAT_REUSE_MATRIX; 1490 } 1491 } 1492 /* last check */ 1493 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1494 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1495 reuse = MAT_INITIAL_MATRIX; 1496 } 1497 } else { /* first time, so we need to create the matrix */ 1498 reuse = MAT_INITIAL_MATRIX; 1499 } 1500 /* extract A_RR */ 1501 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1502 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1503 if (ibs != mbs) { 1504 Mat newmat; 1505 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1506 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1507 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1508 } else { 1509 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1510 } 1511 if (pcbddc->issym) { 1512 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1513 } 1514 if (!pcbddc->ksp_R) { /* create object if not present */ 1515 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1516 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1517 /* default */ 1518 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1519 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1520 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1521 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1522 if (issbaij) { 1523 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1524 } else { 1525 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1526 } 1527 /* Allow user's customization */ 1528 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1529 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1530 } 1531 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1532 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1533 if (!n_R) { 1534 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1535 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1536 } 1537 /* Set Up KSP for Neumann problem of BDDC */ 1538 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1539 } 1540 1541 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1542 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1543 if (pcbddc->dbg_flag) { 1544 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1545 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1546 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1547 } 1548 if (dirichlet) { /* Dirichlet */ 1549 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1550 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1551 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1552 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1553 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1554 /* need to be adapted? */ 1555 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1556 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1557 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1558 /* print info */ 1559 if (pcbddc->dbg_flag) { 1560 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1561 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1562 } 1563 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1564 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1565 } 1566 } 1567 if (neumann) { /* Neumann */ 1568 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1569 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1570 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1571 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1572 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1573 /* need to be adapted? */ 1574 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1575 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1576 /* print info */ 1577 if (pcbddc->dbg_flag) { 1578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1580 } 1581 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1582 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1583 } 1584 } 1585 } 1586 /* free Neumann problem's matrix */ 1587 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1593 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose) 1594 { 1595 PetscErrorCode ierr; 1596 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1597 1598 PetscFunctionBegin; 1599 if (applytranspose) { 1600 if (pcbddc->local_auxmat1) { 1601 ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr); 1602 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr); 1603 } 1604 ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1605 } else { 1606 ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr); 1607 if (pcbddc->local_auxmat1) { 1608 ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr); 1609 ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr); 1610 } 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1618 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1619 { 1620 PetscErrorCode ierr; 1621 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1622 PC_IS* pcis = (PC_IS*) (pc->data); 1623 const PetscScalar zero = 0.0; 1624 1625 PetscFunctionBegin; 1626 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1627 if (applytranspose) { 1628 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1629 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1630 } else { 1631 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1632 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1633 } 1634 /* start communications from local primal nodes to rhs of coarse solver */ 1635 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1636 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1637 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1638 1639 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1640 /* TODO remove null space when doing multilevel */ 1641 if (pcbddc->coarse_ksp) { 1642 Vec rhs,sol; 1643 1644 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1645 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1646 if (applytranspose) { 1647 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1648 } else { 1649 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1650 } 1651 } 1652 1653 /* Local solution on R nodes */ 1654 if (pcis->n) { 1655 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1656 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1658 if (pcbddc->switch_static) { 1659 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1661 } 1662 ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr); 1663 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1664 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 if (pcbddc->switch_static) { 1667 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 } 1670 } 1671 1672 /* communications from coarse sol to local primal nodes */ 1673 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1674 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1675 1676 /* Sum contributions from two levels */ 1677 if (applytranspose) { 1678 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1679 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1680 } else { 1681 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1682 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1689 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1690 { 1691 PetscErrorCode ierr; 1692 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1693 PetscScalar *array; 1694 Vec from,to; 1695 1696 PetscFunctionBegin; 1697 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1698 from = pcbddc->coarse_vec; 1699 to = pcbddc->vec1_P; 1700 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1701 Vec tvec; 1702 1703 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1704 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1705 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1706 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1707 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1708 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1709 } 1710 } else { /* from local to global -> put data in coarse right hand side */ 1711 from = pcbddc->vec1_P; 1712 to = pcbddc->coarse_vec; 1713 } 1714 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1720 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1721 { 1722 PetscErrorCode ierr; 1723 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1724 PetscScalar *array; 1725 Vec from,to; 1726 1727 PetscFunctionBegin; 1728 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1729 from = pcbddc->coarse_vec; 1730 to = pcbddc->vec1_P; 1731 } else { /* from local to global -> put data in coarse right hand side */ 1732 from = pcbddc->vec1_P; 1733 to = pcbddc->coarse_vec; 1734 } 1735 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1736 if (smode == SCATTER_FORWARD) { 1737 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1738 Vec tvec; 1739 1740 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1741 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1742 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1743 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1744 } 1745 } else { 1746 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1747 ierr = VecResetArray(from);CHKERRQ(ierr); 1748 } 1749 } 1750 PetscFunctionReturn(0); 1751 } 1752 1753 /* uncomment for testing purposes */ 1754 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1757 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1758 { 1759 PetscErrorCode ierr; 1760 PC_IS* pcis = (PC_IS*)(pc->data); 1761 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1762 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1763 /* one and zero */ 1764 PetscScalar one=1.0,zero=0.0; 1765 /* space to store constraints and their local indices */ 1766 PetscScalar *constraints_data; 1767 PetscInt *constraints_idxs,*constraints_idxs_B; 1768 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1769 PetscInt *constraints_n; 1770 /* iterators */ 1771 PetscInt i,j,k,total_counts,total_counts_cc; 1772 /* BLAS integers */ 1773 PetscBLASInt lwork,lierr; 1774 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1775 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1776 /* reuse */ 1777 PetscInt olocal_primal_size,olocal_primal_size_cc; 1778 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1779 /* change of basis */ 1780 PetscBool boolforchange,qr_needed; 1781 PetscBT change_basis,qr_needed_idx; 1782 /* auxiliary stuff */ 1783 PetscInt *nnz,*is_indices; 1784 PetscInt ncc; 1785 /* some quantities */ 1786 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1787 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1788 1789 PetscFunctionBegin; 1790 /* Destroy Mat objects computed previously */ 1791 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1792 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1793 /* save info on constraints from previous setup (if any) */ 1794 olocal_primal_size = pcbddc->local_primal_size; 1795 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1796 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1797 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1798 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1799 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1800 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1801 1802 /* print some info */ 1803 if (pcbddc->dbg_flag) { 1804 IS vertices; 1805 PetscInt nv,nedges,nfaces; 1806 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1807 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1808 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1809 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1810 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1811 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1813 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1814 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1815 } 1816 1817 if (!pcbddc->adaptive_selection) { 1818 IS ISForVertices,*ISForFaces,*ISForEdges; 1819 MatNullSpace nearnullsp; 1820 const Vec *nearnullvecs; 1821 Vec *localnearnullsp; 1822 PetscScalar *array; 1823 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1824 PetscBool nnsp_has_cnst; 1825 /* LAPACK working arrays for SVD or POD */ 1826 PetscBool skip_lapack; 1827 PetscScalar *work; 1828 PetscReal *singular_vals; 1829 #if defined(PETSC_USE_COMPLEX) 1830 PetscReal *rwork; 1831 #endif 1832 #if defined(PETSC_MISSING_LAPACK_GESVD) 1833 PetscScalar *temp_basis,*correlation_mat; 1834 #else 1835 PetscBLASInt dummy_int=1; 1836 PetscScalar dummy_scalar=1.; 1837 #endif 1838 1839 /* Get index sets for faces, edges and vertices from graph */ 1840 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1841 /* free unneeded index sets */ 1842 if (!pcbddc->use_vertices) { 1843 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1844 } 1845 if (!pcbddc->use_edges) { 1846 for (i=0;i<n_ISForEdges;i++) { 1847 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1848 } 1849 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1850 n_ISForEdges = 0; 1851 } 1852 if (!pcbddc->use_faces) { 1853 for (i=0;i<n_ISForFaces;i++) { 1854 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1855 } 1856 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1857 n_ISForFaces = 0; 1858 } 1859 1860 #if defined(PETSC_USE_DEBUG) 1861 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 1862 Also use_change_of_basis should be consistent among processors */ 1863 if (pcbddc->NullSpace) { 1864 PetscBool tbool[2],gbool[2]; 1865 1866 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 1867 pcbddc->use_change_of_basis = PETSC_TRUE; 1868 if (!ISForEdges) { 1869 pcbddc->use_change_on_faces = PETSC_TRUE; 1870 } 1871 } 1872 tbool[0] = pcbddc->use_change_of_basis; 1873 tbool[1] = pcbddc->use_change_on_faces; 1874 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1875 pcbddc->use_change_of_basis = gbool[0]; 1876 pcbddc->use_change_on_faces = gbool[1]; 1877 } 1878 #endif 1879 1880 /* check if near null space is attached to global mat */ 1881 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1882 if (nearnullsp) { 1883 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1884 /* remove any stored info */ 1885 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1886 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1887 /* store information for BDDC solver reuse */ 1888 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1889 pcbddc->onearnullspace = nearnullsp; 1890 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1891 for (i=0;i<nnsp_size;i++) { 1892 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1893 } 1894 } else { /* if near null space is not provided BDDC uses constants by default */ 1895 nnsp_size = 0; 1896 nnsp_has_cnst = PETSC_TRUE; 1897 } 1898 /* get max number of constraints on a single cc */ 1899 max_constraints = nnsp_size; 1900 if (nnsp_has_cnst) max_constraints++; 1901 1902 /* 1903 Evaluate maximum storage size needed by the procedure 1904 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 1905 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 1906 There can be multiple constraints per connected component 1907 */ 1908 n_vertices = 0; 1909 if (ISForVertices) { 1910 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1911 } 1912 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 1913 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 1914 1915 total_counts = n_ISForFaces+n_ISForEdges; 1916 total_counts *= max_constraints; 1917 total_counts += n_vertices; 1918 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1919 1920 total_counts = 0; 1921 max_size_of_constraint = 0; 1922 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1923 IS used_is; 1924 if (i<n_ISForEdges) { 1925 used_is = ISForEdges[i]; 1926 } else { 1927 used_is = ISForFaces[i-n_ISForEdges]; 1928 } 1929 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 1930 total_counts += j; 1931 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1932 } 1933 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 1934 1935 /* get local part of global near null space vectors */ 1936 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1937 for (k=0;k<nnsp_size;k++) { 1938 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1939 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1940 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1941 } 1942 1943 /* whether or not to skip lapack calls */ 1944 skip_lapack = PETSC_TRUE; 1945 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1946 1947 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1948 if (!skip_lapack) { 1949 PetscScalar temp_work; 1950 1951 #if defined(PETSC_MISSING_LAPACK_GESVD) 1952 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1953 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 1954 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 1955 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 1956 #if defined(PETSC_USE_COMPLEX) 1957 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 1958 #endif 1959 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1960 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1961 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1962 lwork = -1; 1963 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1964 #if !defined(PETSC_USE_COMPLEX) 1965 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1966 #else 1967 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1968 #endif 1969 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1970 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1971 #else /* on missing GESVD */ 1972 /* SVD */ 1973 PetscInt max_n,min_n; 1974 max_n = max_size_of_constraint; 1975 min_n = max_constraints; 1976 if (max_size_of_constraint < max_constraints) { 1977 min_n = max_size_of_constraint; 1978 max_n = max_constraints; 1979 } 1980 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 1981 #if defined(PETSC_USE_COMPLEX) 1982 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 1983 #endif 1984 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1985 lwork = -1; 1986 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1987 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1988 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1989 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1990 #if !defined(PETSC_USE_COMPLEX) 1991 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 1992 #else 1993 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 1994 #endif 1995 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1996 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1997 #endif /* on missing GESVD */ 1998 /* Allocate optimal workspace */ 1999 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2000 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2001 } 2002 /* Now we can loop on constraining sets */ 2003 total_counts = 0; 2004 constraints_idxs_ptr[0] = 0; 2005 constraints_data_ptr[0] = 0; 2006 /* vertices */ 2007 if (n_vertices) { 2008 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2009 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2010 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2011 for (i=0;i<n_vertices;i++) { 2012 constraints_n[total_counts] = 1; 2013 constraints_data[total_counts] = 1.0; 2014 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2015 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2016 total_counts++; 2017 } 2018 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2019 PetscBool used_vertex; 2020 for (i=0;i<n_vertices;i++) { 2021 used_vertex = PETSC_FALSE; 2022 k = 0; 2023 while (!used_vertex && k<nnsp_size) { 2024 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2025 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2026 constraints_n[total_counts] = 1; 2027 constraints_idxs[total_counts] = is_indices[i]; 2028 constraints_data[total_counts] = 1.0; 2029 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2030 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2031 total_counts++; 2032 used_vertex = PETSC_TRUE; 2033 } 2034 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2035 k++; 2036 } 2037 } 2038 } 2039 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2040 n_vertices = total_counts; 2041 } 2042 2043 /* edges and faces */ 2044 total_counts_cc = total_counts; 2045 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2046 IS used_is; 2047 PetscBool idxs_copied = PETSC_FALSE; 2048 2049 if (ncc<n_ISForEdges) { 2050 used_is = ISForEdges[ncc]; 2051 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2052 } else { 2053 used_is = ISForFaces[ncc-n_ISForEdges]; 2054 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2055 } 2056 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2057 2058 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2059 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2060 /* change of basis should not be performed on local periodic nodes */ 2061 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2062 if (nnsp_has_cnst) { 2063 PetscScalar quad_value; 2064 2065 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2066 idxs_copied = PETSC_TRUE; 2067 2068 if (!pcbddc->use_nnsp_true) { 2069 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2070 } else { 2071 quad_value = 1.0; 2072 } 2073 for (j=0;j<size_of_constraint;j++) { 2074 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2075 } 2076 temp_constraints++; 2077 total_counts++; 2078 } 2079 for (k=0;k<nnsp_size;k++) { 2080 PetscReal real_value; 2081 PetscScalar *ptr_to_data; 2082 2083 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2084 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2085 for (j=0;j<size_of_constraint;j++) { 2086 ptr_to_data[j] = array[is_indices[j]]; 2087 } 2088 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2089 /* check if array is null on the connected component */ 2090 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2091 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2092 if (real_value > 0.0) { /* keep indices and values */ 2093 temp_constraints++; 2094 total_counts++; 2095 if (!idxs_copied) { 2096 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2097 idxs_copied = PETSC_TRUE; 2098 } 2099 } 2100 } 2101 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2102 valid_constraints = temp_constraints; 2103 if (!pcbddc->use_nnsp_true && temp_constraints) { 2104 if (temp_constraints == 1) { /* just normalize the constraint */ 2105 PetscScalar norm,*ptr_to_data; 2106 2107 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2109 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2110 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2111 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2112 } else { /* perform SVD */ 2113 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2114 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2115 2116 #if defined(PETSC_MISSING_LAPACK_GESVD) 2117 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2118 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2119 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2120 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2121 from that computed using LAPACKgesvd 2122 -> This is due to a different computation of eigenvectors in LAPACKheev 2123 -> The quality of the POD-computed basis will be the same */ 2124 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2125 /* Store upper triangular part of correlation matrix */ 2126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2127 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2128 for (j=0;j<temp_constraints;j++) { 2129 for (k=0;k<j+1;k++) { 2130 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2131 } 2132 } 2133 /* compute eigenvalues and eigenvectors of correlation matrix */ 2134 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2135 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2136 #if !defined(PETSC_USE_COMPLEX) 2137 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2138 #else 2139 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2140 #endif 2141 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2142 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2143 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2144 j = 0; 2145 while (j < temp_constraints && singular_vals[j] < tol) j++; 2146 total_counts = total_counts-j; 2147 valid_constraints = temp_constraints-j; 2148 /* scale and copy POD basis into used quadrature memory */ 2149 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2150 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2151 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2152 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2153 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2154 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2155 if (j<temp_constraints) { 2156 PetscInt ii; 2157 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2158 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2159 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2160 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2161 for (k=0;k<temp_constraints-j;k++) { 2162 for (ii=0;ii<size_of_constraint;ii++) { 2163 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2164 } 2165 } 2166 } 2167 #else /* on missing GESVD */ 2168 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2169 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2170 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2171 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2172 #if !defined(PETSC_USE_COMPLEX) 2173 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2174 #else 2175 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2176 #endif 2177 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2178 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2179 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2180 k = temp_constraints; 2181 if (k > size_of_constraint) k = size_of_constraint; 2182 j = 0; 2183 while (j < k && singular_vals[k-j-1] < tol) j++; 2184 valid_constraints = k-j; 2185 total_counts = total_counts-temp_constraints+valid_constraints; 2186 #endif /* on missing GESVD */ 2187 } 2188 } 2189 /* update pointers information */ 2190 if (valid_constraints) { 2191 constraints_n[total_counts_cc] = valid_constraints; 2192 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2193 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2194 total_counts_cc++; 2195 /* set change_of_basis flag */ 2196 if (boolforchange) { 2197 for (j=valid_constraints;j>0;j--) { 2198 PetscBTSet(change_basis,total_counts-j); 2199 } 2200 } 2201 } 2202 } 2203 /* free workspace */ 2204 if (!skip_lapack) { 2205 ierr = PetscFree(work);CHKERRQ(ierr); 2206 #if defined(PETSC_USE_COMPLEX) 2207 ierr = PetscFree(rwork);CHKERRQ(ierr); 2208 #endif 2209 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2210 #if defined(PETSC_MISSING_LAPACK_GESVD) 2211 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2212 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2213 #endif 2214 } 2215 for (k=0;k<nnsp_size;k++) { 2216 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2217 } 2218 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2219 /* free index sets of faces, edges and vertices */ 2220 for (i=0;i<n_ISForFaces;i++) { 2221 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2222 } 2223 if (n_ISForFaces) { 2224 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2225 } 2226 for (i=0;i<n_ISForEdges;i++) { 2227 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2228 } 2229 if (n_ISForEdges) { 2230 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2231 } 2232 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2233 } else { 2234 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2235 2236 total_counts = 0; 2237 n_vertices = 0; 2238 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 2239 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2240 } 2241 max_constraints = 0; 2242 total_counts_cc = 0; 2243 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2244 total_counts += pcbddc->adaptive_constraints_n[i]; 2245 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2246 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2247 } 2248 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2249 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2250 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2251 constraints_data = pcbddc->adaptive_constraints_data; 2252 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2253 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2254 total_counts_cc = 0; 2255 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2256 if (pcbddc->adaptive_constraints_n[i]) { 2257 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2258 } 2259 } 2260 #if 0 2261 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2262 for (i=0;i<total_counts_cc;i++) { 2263 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2264 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2265 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2266 printf(" %d",constraints_idxs[j]); 2267 } 2268 printf("\n"); 2269 printf("number of cc: %d\n",constraints_n[i]); 2270 } 2271 for (i=0;i<n_vertices;i++) { 2272 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2273 } 2274 for (i=0;i<sub_schurs->n_subs;i++) { 2275 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2276 } 2277 #endif 2278 2279 max_size_of_constraint = 0; 2280 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2281 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2282 /* Change of basis */ 2283 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2284 if (pcbddc->use_change_of_basis) { 2285 PetscInt cum = n_vertices; 2286 for (i=0;i<sub_schurs->n_subs;i++) { 2287 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2288 for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) { 2289 ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr); 2290 } 2291 } 2292 cum += pcbddc->adaptive_constraints_n[i+n_vertices]; 2293 } 2294 } 2295 } 2296 pcbddc->local_primal_size = total_counts; 2297 pcbddc->local_primal_size_cc = total_counts_cc; 2298 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2299 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2300 2301 /* map constraints_idxs in boundary numbering */ 2302 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2303 if (i != constraints_idxs_ptr[total_counts_cc]) { 2304 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2305 } 2306 2307 /* Create constraint matrix */ 2308 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2309 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2310 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2311 2312 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2313 /* determine if a QR strategy is needed for change of basis */ 2314 qr_needed = PETSC_FALSE; 2315 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2316 total_primal_vertices=0; 2317 for (i=0;i<total_counts_cc;i++) { 2318 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2319 if (size_of_constraint == 1) { 2320 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2321 } else if (PetscBTLookup(change_basis,i)) { 2322 for (k=0;k<constraints_n[i];k++) { 2323 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2324 } 2325 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2326 PetscBTSet(qr_needed_idx,i); 2327 qr_needed = PETSC_TRUE; 2328 } 2329 } 2330 } 2331 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2332 pcbddc->n_vertices = total_primal_vertices; 2333 /* permute indices in order to have a sorted set of vertices */ 2334 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2335 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2336 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2337 2338 /* nonzero structure of constraint matrix */ 2339 /* and get reference dof for local constraints */ 2340 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2341 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2342 2343 j = total_primal_vertices; 2344 total_counts = total_primal_vertices; 2345 for (i=n_vertices;i<total_counts_cc;i++) { 2346 if (!PetscBTLookup(change_basis,i)) { 2347 pcbddc->local_primal_ref_node[i] = constraints_idxs[constraints_idxs_ptr[i]]; 2348 pcbddc->local_primal_ref_mult[i] = constraints_n[i]; 2349 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2350 for (k=0;k<constraints_n[i];k++) { 2351 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2352 nnz[j+k] = size_of_constraint; 2353 } 2354 j += constraints_n[i]; 2355 } 2356 } 2357 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2358 ierr = PetscFree(nnz);CHKERRQ(ierr); 2359 2360 /* set values in constraint matrix */ 2361 for (i=0;i<total_primal_vertices;i++) { 2362 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2363 } 2364 total_counts = total_primal_vertices; 2365 for (i=n_vertices;i<total_counts_cc;i++) { 2366 if (!PetscBTLookup(change_basis,i)) { 2367 PetscInt *cols; 2368 2369 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2370 cols = constraints_idxs+constraints_idxs_ptr[i]; 2371 for (k=0;k<constraints_n[i];k++) { 2372 PetscInt row = total_counts+k; 2373 PetscScalar *vals; 2374 2375 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2376 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2377 } 2378 total_counts += constraints_n[i]; 2379 } 2380 } 2381 /* assembling */ 2382 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2383 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2384 2385 /* 2386 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2387 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2388 */ 2389 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2390 if (pcbddc->use_change_of_basis) { 2391 /* dual and primal dofs on a single cc */ 2392 PetscInt dual_dofs,primal_dofs; 2393 /* working stuff for GEQRF */ 2394 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2395 PetscBLASInt lqr_work; 2396 /* working stuff for UNGQR */ 2397 PetscScalar *gqr_work,lgqr_work_t; 2398 PetscBLASInt lgqr_work; 2399 /* working stuff for TRTRS */ 2400 PetscScalar *trs_rhs; 2401 PetscBLASInt Blas_NRHS; 2402 /* pointers for values insertion into change of basis matrix */ 2403 PetscInt *start_rows,*start_cols; 2404 PetscScalar *start_vals; 2405 /* working stuff for values insertion */ 2406 PetscBT is_primal; 2407 PetscInt *aux_primal_numbering_B; 2408 /* matrix sizes */ 2409 PetscInt global_size,local_size; 2410 /* temporary change of basis */ 2411 Mat localChangeOfBasisMatrix; 2412 /* extra space for debugging */ 2413 PetscScalar *dbg_work; 2414 2415 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2416 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2417 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2418 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2419 /* nonzeros for local mat */ 2420 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2421 for (i=0;i<pcis->n;i++) nnz[i]=1; 2422 for (i=n_vertices;i<total_counts_cc;i++) { 2423 if (PetscBTLookup(change_basis,i)) { 2424 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2425 if (PetscBTLookup(qr_needed_idx,i)) { 2426 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2427 } else { 2428 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2429 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2430 } 2431 } 2432 } 2433 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2434 ierr = PetscFree(nnz);CHKERRQ(ierr); 2435 /* Set initial identity in the matrix */ 2436 for (i=0;i<pcis->n;i++) { 2437 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2438 } 2439 2440 if (pcbddc->dbg_flag) { 2441 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2442 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2443 } 2444 2445 2446 /* Now we loop on the constraints which need a change of basis */ 2447 /* 2448 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2449 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2450 2451 Basic blocks of change of basis matrix T computed by 2452 2453 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2454 2455 | 1 0 ... 0 s_1/S | 2456 | 0 1 ... 0 s_2/S | 2457 | ... | 2458 | 0 ... 1 s_{n-1}/S | 2459 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2460 2461 with S = \sum_{i=1}^n s_i^2 2462 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2463 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2464 2465 - QR decomposition of constraints otherwise 2466 */ 2467 if (qr_needed) { 2468 /* space to store Q */ 2469 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2470 /* first we issue queries for optimal work */ 2471 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2472 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2473 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2474 lqr_work = -1; 2475 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2476 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2477 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2478 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2479 lgqr_work = -1; 2480 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2481 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2482 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2483 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2484 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2485 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2486 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2487 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2488 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2489 /* array to store scaling factors for reflectors */ 2490 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2491 /* array to store rhs and solution of triangular solver */ 2492 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2493 /* allocating workspace for check */ 2494 if (pcbddc->dbg_flag) { 2495 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2496 } 2497 } 2498 /* array to store whether a node is primal or not */ 2499 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2500 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2501 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2502 if (i != total_primal_vertices) { 2503 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2504 } 2505 for (i=0;i<total_primal_vertices;i++) { 2506 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2507 } 2508 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2509 2510 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2511 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2512 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2513 if (PetscBTLookup(change_basis,total_counts)) { 2514 /* get constraint info */ 2515 primal_dofs = constraints_n[total_counts]; 2516 dual_dofs = size_of_constraint-primal_dofs; 2517 2518 if (pcbddc->dbg_flag) { 2519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2520 } 2521 2522 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2523 2524 /* copy quadrature constraints for change of basis check */ 2525 if (pcbddc->dbg_flag) { 2526 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2527 } 2528 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2529 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2530 2531 /* compute QR decomposition of constraints */ 2532 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2533 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2534 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2535 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2536 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2537 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2538 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2539 2540 /* explictly compute R^-T */ 2541 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2542 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2543 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2544 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2546 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2547 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2548 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2549 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2550 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2551 2552 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2555 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2556 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2557 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2558 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2559 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2560 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2561 2562 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2563 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2564 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2565 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2566 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2567 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2568 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2569 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2571 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2572 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2573 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2574 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2575 2576 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2577 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2578 /* insert cols for primal dofs */ 2579 for (j=0;j<primal_dofs;j++) { 2580 start_vals = &qr_basis[j*size_of_constraint]; 2581 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2582 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2583 } 2584 /* insert cols for dual dofs */ 2585 for (j=0,k=0;j<dual_dofs;k++) { 2586 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2587 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2588 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2589 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2590 j++; 2591 } 2592 } 2593 2594 /* check change of basis */ 2595 if (pcbddc->dbg_flag) { 2596 PetscInt ii,jj; 2597 PetscBool valid_qr=PETSC_TRUE; 2598 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2599 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2600 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2601 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2602 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2603 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2604 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2605 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2606 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2607 for (jj=0;jj<size_of_constraint;jj++) { 2608 for (ii=0;ii<primal_dofs;ii++) { 2609 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2610 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2611 } 2612 } 2613 if (!valid_qr) { 2614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2615 for (jj=0;jj<size_of_constraint;jj++) { 2616 for (ii=0;ii<primal_dofs;ii++) { 2617 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2618 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2619 } 2620 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2621 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2622 } 2623 } 2624 } 2625 } else { 2626 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2627 } 2628 } 2629 } else { /* simple transformation block */ 2630 PetscInt row,col; 2631 PetscScalar val,norm; 2632 2633 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2634 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2635 for (j=0;j<size_of_constraint;j++) { 2636 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2637 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2638 if (!PetscBTLookup(is_primal,row_B)) { 2639 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2640 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2641 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2642 } else { 2643 for (k=0;k<size_of_constraint;k++) { 2644 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2645 if (row != col) { 2646 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2647 } else { 2648 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2649 } 2650 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2651 } 2652 } 2653 } 2654 if (pcbddc->dbg_flag) { 2655 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2656 } 2657 } 2658 } else { 2659 if (pcbddc->dbg_flag) { 2660 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2661 } 2662 } 2663 } 2664 2665 /* free workspace */ 2666 if (qr_needed) { 2667 if (pcbddc->dbg_flag) { 2668 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2669 } 2670 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2671 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2672 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2673 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2674 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2675 } 2676 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2677 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2678 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679 2680 /* assembling of global change of variable */ 2681 { 2682 Mat tmat; 2683 PetscInt bs; 2684 2685 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2686 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2687 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2688 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2689 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2690 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2691 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2692 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2693 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2694 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2695 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2696 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2697 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2698 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2699 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2700 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2701 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2702 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2703 } 2704 /* check */ 2705 if (pcbddc->dbg_flag) { 2706 PetscReal error; 2707 Vec x,x_change; 2708 2709 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2710 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2711 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2712 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2713 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2714 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2715 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2716 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2717 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2718 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2719 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2720 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2721 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2722 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2723 ierr = VecDestroy(&x);CHKERRQ(ierr); 2724 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2725 } 2726 2727 /* adapt sub_schurs computed (if any) */ 2728 if (pcbddc->use_deluxe_scaling) { 2729 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2730 if (sub_schurs->S_Ej_all) { 2731 Mat S_new,tmat; 2732 IS is_all_N; 2733 2734 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2735 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2736 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2737 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2738 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2739 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2740 sub_schurs->S_Ej_all = S_new; 2741 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2742 if (sub_schurs->sum_S_Ej_all) { 2743 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2744 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2745 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2746 sub_schurs->sum_S_Ej_all = S_new; 2747 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2748 } 2749 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2750 } 2751 } 2752 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2753 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2754 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2755 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2756 } 2757 2758 /* set up change of basis context */ 2759 if (pcbddc->ChangeOfBasisMatrix) { 2760 PCBDDCChange_ctx change_ctx; 2761 2762 if (!pcbddc->new_global_mat) { 2763 PetscInt global_size,local_size; 2764 2765 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2766 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2767 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2768 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2769 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2770 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2771 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2772 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2773 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2774 } else { 2775 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2776 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2777 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2778 } 2779 if (!pcbddc->user_ChangeOfBasisMatrix) { 2780 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2781 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2782 } else { 2783 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2784 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2785 } 2786 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2787 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2788 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790 } 2791 2792 /* check if a new primal space has been introduced */ 2793 pcbddc->new_primal_space_local = PETSC_TRUE; 2794 if (olocal_primal_size == pcbddc->local_primal_size) { 2795 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2796 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2797 if (!pcbddc->new_primal_space_local) { 2798 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2799 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2800 } 2801 } 2802 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2803 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2804 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2805 2806 /* flush dbg viewer */ 2807 if (pcbddc->dbg_flag) { 2808 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2809 } 2810 2811 /* free workspace */ 2812 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2813 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2814 if (!pcbddc->adaptive_selection) { 2815 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2816 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2817 } else { 2818 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2819 pcbddc->adaptive_constraints_idxs_ptr, 2820 pcbddc->adaptive_constraints_data_ptr, 2821 pcbddc->adaptive_constraints_idxs, 2822 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2823 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2824 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2825 } 2826 PetscFunctionReturn(0); 2827 } 2828 2829 #undef __FUNCT__ 2830 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2831 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2832 { 2833 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2834 PC_IS *pcis = (PC_IS*)pc->data; 2835 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2836 PetscInt ierr,i,vertex_size,N; 2837 PetscViewer viewer=pcbddc->dbg_viewer; 2838 2839 PetscFunctionBegin; 2840 /* Reset previously computed graph */ 2841 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2842 /* Init local Graph struct */ 2843 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2844 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 2845 2846 /* Check validity of the csr graph passed in by the user */ 2847 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2848 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2849 } 2850 2851 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2852 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2853 PetscInt *xadj,*adjncy; 2854 PetscInt nvtxs; 2855 PetscBool flg_row=PETSC_FALSE; 2856 2857 if (pcbddc->use_local_adj) { 2858 2859 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2860 if (flg_row) { 2861 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2862 pcbddc->computed_rowadj = PETSC_TRUE; 2863 } 2864 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2865 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2866 IS is_dummy; 2867 ISLocalToGlobalMapping l2gmap_dummy; 2868 PetscInt j,sum; 2869 PetscInt *cxadj,*cadjncy; 2870 const PetscInt *idxs; 2871 PCBDDCGraph graph; 2872 PetscBT is_on_boundary; 2873 2874 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2875 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2876 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2877 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2878 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 2879 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2880 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2881 if (flg_row) { 2882 graph->xadj = xadj; 2883 graph->adjncy = adjncy; 2884 } 2885 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2886 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2887 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2888 2889 if (pcbddc->dbg_flag) { 2890 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 2891 for (i=0;i<graph->ncc;i++) { 2892 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2893 } 2894 } 2895 2896 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2897 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2898 for (i=0;i<pcis->n_B;i++) { 2899 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2900 } 2901 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2902 2903 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2904 sum = 0; 2905 for (i=0;i<graph->ncc;i++) { 2906 PetscInt sizecc = 0; 2907 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2908 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2909 sizecc++; 2910 } 2911 } 2912 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2913 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2914 cxadj[graph->queue[j]] = sizecc; 2915 } 2916 } 2917 sum += sizecc*sizecc; 2918 } 2919 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2920 sum = 0; 2921 for (i=0;i<pcis->n;i++) { 2922 PetscInt temp = cxadj[i]; 2923 cxadj[i] = sum; 2924 sum += temp; 2925 } 2926 cxadj[pcis->n] = sum; 2927 for (i=0;i<graph->ncc;i++) { 2928 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2929 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2930 PetscInt k,sizecc = 0; 2931 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2932 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2933 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2934 sizecc++; 2935 } 2936 } 2937 } 2938 } 2939 } 2940 if (sum) { 2941 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2942 } else { 2943 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2944 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2945 } 2946 graph->xadj = 0; 2947 graph->adjncy = 0; 2948 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2949 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2950 } 2951 } 2952 if (pcbddc->dbg_flag) { 2953 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2954 } 2955 2956 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 2957 vertex_size = 1; 2958 if (pcbddc->user_provided_isfordofs) { 2959 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 2960 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2961 for (i=0;i<pcbddc->n_ISForDofs;i++) { 2962 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2963 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 2964 } 2965 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 2966 pcbddc->n_ISForDofs = 0; 2967 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 2968 } 2969 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2970 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2971 } else { 2972 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 2973 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 2974 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 2975 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 2976 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 2977 } 2978 } 2979 } 2980 2981 /* Setup of Graph */ 2982 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 2983 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 2984 } 2985 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 2986 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 2987 } 2988 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 2989 2990 /* Graph's connected components analysis */ 2991 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2992 2993 /* print some info to stdout */ 2994 if (pcbddc->dbg_flag) { 2995 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 2996 } 2997 2998 /* mark topography has done */ 2999 pcbddc->recompute_topography = PETSC_FALSE; 3000 PetscFunctionReturn(0); 3001 } 3002 3003 #undef __FUNCT__ 3004 #define __FUNCT__ "PCBDDCSubsetNumbering" 3005 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3006 { 3007 Vec local_vec,global_vec; 3008 IS seqis,paris; 3009 VecScatter scatter_ctx; 3010 PetscScalar *array; 3011 PetscInt *temp_global_dofs; 3012 PetscScalar globalsum; 3013 PetscInt i,j,s; 3014 PetscInt nlocals,first_index,old_index,max_local,max_global; 3015 PetscMPIInt rank_prec_comm,size_prec_comm; 3016 PetscInt *dof_sizes,*dof_displs; 3017 PetscBool first_found; 3018 PetscErrorCode ierr; 3019 3020 PetscFunctionBegin; 3021 /* mpi buffers */ 3022 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3023 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3024 j = ( !rank_prec_comm ? size_prec_comm : 0); 3025 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3026 /* get maximum size of subset */ 3027 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3028 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3029 max_local = 0; 3030 for (i=0;i<n_local_dofs;i++) { 3031 if (max_local < temp_global_dofs[i] ) { 3032 max_local = temp_global_dofs[i]; 3033 } 3034 } 3035 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3036 max_global++; 3037 max_local = 0; 3038 for (i=0;i<n_local_dofs;i++) { 3039 if (max_local < local_dofs[i] ) { 3040 max_local = local_dofs[i]; 3041 } 3042 } 3043 max_local++; 3044 /* allocate workspace */ 3045 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3046 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3047 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3048 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3049 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3050 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3051 /* create scatter */ 3052 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3053 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3054 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3055 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3056 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3057 /* init array */ 3058 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3059 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3060 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3061 if (local_dofs_mult) { 3062 for (i=0;i<n_local_dofs;i++) { 3063 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3064 } 3065 } else { 3066 for (i=0;i<n_local_dofs;i++) { 3067 array[local_dofs[i]]=1.0; 3068 } 3069 } 3070 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3071 /* scatter into global vec and get total number of global dofs */ 3072 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3073 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3074 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3075 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3076 /* Fill global_vec with cumulative function for global numbering */ 3077 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3078 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3079 nlocals = 0; 3080 first_index = -1; 3081 first_found = PETSC_FALSE; 3082 for (i=0;i<s;i++) { 3083 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3084 first_found = PETSC_TRUE; 3085 first_index = i; 3086 } 3087 nlocals += (PetscInt)PetscRealPart(array[i]); 3088 } 3089 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3090 if (!rank_prec_comm) { 3091 dof_displs[0]=0; 3092 for (i=1;i<size_prec_comm;i++) { 3093 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3094 } 3095 } 3096 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3097 if (first_found) { 3098 array[first_index] += (PetscScalar)nlocals; 3099 old_index = first_index; 3100 for (i=first_index+1;i<s;i++) { 3101 if (PetscRealPart(array[i]) > 0.1) { 3102 array[i] += array[old_index]; 3103 old_index = i; 3104 } 3105 } 3106 } 3107 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3108 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3109 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3110 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3111 /* get global ordering of local dofs */ 3112 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3113 if (local_dofs_mult) { 3114 for (i=0;i<n_local_dofs;i++) { 3115 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3116 } 3117 } else { 3118 for (i=0;i<n_local_dofs;i++) { 3119 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3120 } 3121 } 3122 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3123 /* free workspace */ 3124 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3125 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3126 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3127 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3128 /* return pointer to global ordering of local dofs */ 3129 *global_numbering_subset = temp_global_dofs; 3130 PetscFunctionReturn(0); 3131 } 3132 3133 #undef __FUNCT__ 3134 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3135 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3136 { 3137 PetscInt i,j; 3138 PetscScalar *alphas; 3139 PetscErrorCode ierr; 3140 3141 PetscFunctionBegin; 3142 /* this implements stabilized Gram-Schmidt */ 3143 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3144 for (i=0;i<n;i++) { 3145 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3146 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3147 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3148 } 3149 ierr = PetscFree(alphas);CHKERRQ(ierr); 3150 PetscFunctionReturn(0); 3151 } 3152 3153 #undef __FUNCT__ 3154 #define __FUNCT__ "MatISGetSubassemblingPattern" 3155 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3156 { 3157 Mat subdomain_adj; 3158 IS new_ranks,ranks_send_to; 3159 MatPartitioning partitioner; 3160 Mat_IS *matis; 3161 PetscInt n_neighs,*neighs,*n_shared,**shared; 3162 PetscInt prank; 3163 PetscMPIInt size,rank,color; 3164 PetscInt *xadj,*adjncy,*oldranks; 3165 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3166 PetscInt i,local_size,threshold=0; 3167 PetscErrorCode ierr; 3168 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3169 PetscSubcomm subcomm; 3170 3171 PetscFunctionBegin; 3172 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3173 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3174 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3175 3176 /* Get info on mapping */ 3177 matis = (Mat_IS*)(mat->data); 3178 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3179 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3180 3181 /* build local CSR graph of subdomains' connectivity */ 3182 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3183 xadj[0] = 0; 3184 xadj[1] = PetscMax(n_neighs-1,0); 3185 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3186 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3187 3188 if (threshold) { 3189 PetscInt xadj_count = 0; 3190 for (i=1;i<n_neighs;i++) { 3191 if (n_shared[i] > threshold) { 3192 adjncy[xadj_count] = neighs[i]; 3193 adjncy_wgt[xadj_count] = n_shared[i]; 3194 xadj_count++; 3195 } 3196 } 3197 xadj[1] = xadj_count; 3198 } else { 3199 if (xadj[1]) { 3200 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3201 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3202 } 3203 } 3204 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3205 if (use_square) { 3206 for (i=0;i<xadj[1];i++) { 3207 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3208 } 3209 } 3210 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3211 3212 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3213 3214 /* 3215 Restrict work on active processes only. 3216 */ 3217 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3218 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3219 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3220 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3221 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3222 if (color) { 3223 ierr = PetscFree(xadj);CHKERRQ(ierr); 3224 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3225 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3226 } else { 3227 PetscInt coarsening_ratio; 3228 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3229 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3230 prank = rank; 3231 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3232 /* 3233 for (i=0;i<size;i++) { 3234 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3235 } 3236 */ 3237 for (i=0;i<xadj[1];i++) { 3238 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3239 } 3240 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3241 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3242 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3243 3244 /* Partition */ 3245 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3246 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3247 if (use_vwgt) { 3248 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3249 v_wgt[0] = local_size; 3250 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3251 } 3252 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3253 coarsening_ratio = size/n_subdomains; 3254 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3255 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3256 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3257 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3258 3259 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3260 if (contiguous) { 3261 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3262 } else { 3263 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3264 } 3265 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3266 /* clean up */ 3267 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3268 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3269 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3270 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3271 } 3272 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3273 3274 /* assemble parallel IS for sends */ 3275 i = 1; 3276 if (color) i=0; 3277 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3278 3279 /* get back IS */ 3280 *is_sends = ranks_send_to; 3281 PetscFunctionReturn(0); 3282 } 3283 3284 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3285 3286 #undef __FUNCT__ 3287 #define __FUNCT__ "MatISSubassemble" 3288 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3289 { 3290 Mat local_mat; 3291 Mat_IS *matis; 3292 IS is_sends_internal; 3293 PetscInt rows,cols,new_local_rows; 3294 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3295 PetscBool ismatis,isdense,newisdense,destroy_mat; 3296 ISLocalToGlobalMapping l2gmap; 3297 PetscInt* l2gmap_indices; 3298 const PetscInt* is_indices; 3299 MatType new_local_type; 3300 /* buffers */ 3301 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3302 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3303 PetscInt *recv_buffer_idxs_local; 3304 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3305 /* MPI */ 3306 MPI_Comm comm,comm_n; 3307 PetscSubcomm subcomm; 3308 PetscMPIInt n_sends,n_recvs,commsize; 3309 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3310 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3311 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3312 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3313 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3314 PetscErrorCode ierr; 3315 3316 PetscFunctionBegin; 3317 /* TODO: add missing checks */ 3318 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3319 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3320 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3321 PetscValidLogicalCollectiveInt(mat,nis,7); 3322 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3323 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3324 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3325 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3326 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3327 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3328 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3329 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3330 PetscInt mrows,mcols,mnrows,mncols; 3331 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3332 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3333 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3334 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3335 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3336 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3337 } 3338 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3339 PetscValidLogicalCollectiveInt(mat,bs,0); 3340 /* prepare IS for sending if not provided */ 3341 if (!is_sends) { 3342 PetscBool pcontig = PETSC_TRUE; 3343 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3344 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3345 } else { 3346 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3347 is_sends_internal = is_sends; 3348 } 3349 3350 /* get pointer of MATIS data */ 3351 matis = (Mat_IS*)mat->data; 3352 3353 /* get comm */ 3354 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3355 3356 /* compute number of sends */ 3357 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3358 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3359 3360 /* compute number of receives */ 3361 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3362 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3363 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3364 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3365 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3366 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3367 ierr = PetscFree(iflags);CHKERRQ(ierr); 3368 3369 /* restrict comm if requested */ 3370 subcomm = 0; 3371 destroy_mat = PETSC_FALSE; 3372 if (restrict_comm) { 3373 PetscMPIInt color,subcommsize; 3374 3375 color = 0; 3376 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3377 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3378 subcommsize = commsize - subcommsize; 3379 /* check if reuse has been requested */ 3380 if (reuse == MAT_REUSE_MATRIX) { 3381 if (*mat_n) { 3382 PetscMPIInt subcommsize2; 3383 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3384 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3385 comm_n = PetscObjectComm((PetscObject)*mat_n); 3386 } else { 3387 comm_n = PETSC_COMM_SELF; 3388 } 3389 } else { /* MAT_INITIAL_MATRIX */ 3390 PetscMPIInt rank; 3391 3392 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3393 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3394 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3395 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3396 comm_n = PetscSubcommChild(subcomm); 3397 } 3398 /* flag to destroy *mat_n if not significative */ 3399 if (color) destroy_mat = PETSC_TRUE; 3400 } else { 3401 comm_n = comm; 3402 } 3403 3404 /* prepare send/receive buffers */ 3405 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3406 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3407 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3408 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3409 if (nis) { 3410 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3411 } 3412 3413 /* Get data from local matrices */ 3414 if (!isdense) { 3415 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3416 /* TODO: See below some guidelines on how to prepare the local buffers */ 3417 /* 3418 send_buffer_vals should contain the raw values of the local matrix 3419 send_buffer_idxs should contain: 3420 - MatType_PRIVATE type 3421 - PetscInt size_of_l2gmap 3422 - PetscInt global_row_indices[size_of_l2gmap] 3423 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3424 */ 3425 } else { 3426 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3427 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3428 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3429 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3430 send_buffer_idxs[1] = i; 3431 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3432 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3433 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3434 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3435 for (i=0;i<n_sends;i++) { 3436 ilengths_vals[is_indices[i]] = len*len; 3437 ilengths_idxs[is_indices[i]] = len+2; 3438 } 3439 } 3440 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3441 /* additional is (if any) */ 3442 if (nis) { 3443 PetscMPIInt psum; 3444 PetscInt j; 3445 for (j=0,psum=0;j<nis;j++) { 3446 PetscInt plen; 3447 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3448 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3449 psum += len+1; /* indices + lenght */ 3450 } 3451 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3452 for (j=0,psum=0;j<nis;j++) { 3453 PetscInt plen; 3454 const PetscInt *is_array_idxs; 3455 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3456 send_buffer_idxs_is[psum] = plen; 3457 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3458 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3459 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3460 psum += plen+1; /* indices + lenght */ 3461 } 3462 for (i=0;i<n_sends;i++) { 3463 ilengths_idxs_is[is_indices[i]] = psum; 3464 } 3465 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3466 } 3467 3468 buf_size_idxs = 0; 3469 buf_size_vals = 0; 3470 buf_size_idxs_is = 0; 3471 for (i=0;i<n_recvs;i++) { 3472 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3473 buf_size_vals += (PetscInt)olengths_vals[i]; 3474 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3475 } 3476 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3477 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3478 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3479 3480 /* get new tags for clean communications */ 3481 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3482 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3483 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3484 3485 /* allocate for requests */ 3486 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3487 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3488 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3489 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3490 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3491 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3492 3493 /* communications */ 3494 ptr_idxs = recv_buffer_idxs; 3495 ptr_vals = recv_buffer_vals; 3496 ptr_idxs_is = recv_buffer_idxs_is; 3497 for (i=0;i<n_recvs;i++) { 3498 source_dest = onodes[i]; 3499 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3500 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3501 ptr_idxs += olengths_idxs[i]; 3502 ptr_vals += olengths_vals[i]; 3503 if (nis) { 3504 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3505 ptr_idxs_is += olengths_idxs_is[i]; 3506 } 3507 } 3508 for (i=0;i<n_sends;i++) { 3509 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3510 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3511 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3512 if (nis) { 3513 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3514 } 3515 } 3516 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3517 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3518 3519 /* assemble new l2g map */ 3520 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3521 ptr_idxs = recv_buffer_idxs; 3522 new_local_rows = 0; 3523 for (i=0;i<n_recvs;i++) { 3524 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3525 ptr_idxs += olengths_idxs[i]; 3526 } 3527 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3528 ptr_idxs = recv_buffer_idxs; 3529 new_local_rows = 0; 3530 for (i=0;i<n_recvs;i++) { 3531 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3532 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3533 ptr_idxs += olengths_idxs[i]; 3534 } 3535 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3536 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3537 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3538 3539 /* infer new local matrix type from received local matrices type */ 3540 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3541 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3542 if (n_recvs) { 3543 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3544 ptr_idxs = recv_buffer_idxs; 3545 for (i=0;i<n_recvs;i++) { 3546 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3547 new_local_type_private = MATAIJ_PRIVATE; 3548 break; 3549 } 3550 ptr_idxs += olengths_idxs[i]; 3551 } 3552 switch (new_local_type_private) { 3553 case MATDENSE_PRIVATE: 3554 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3555 new_local_type = MATSEQAIJ; 3556 bs = 1; 3557 } else { /* if I receive only 1 dense matrix */ 3558 new_local_type = MATSEQDENSE; 3559 bs = 1; 3560 } 3561 break; 3562 case MATAIJ_PRIVATE: 3563 new_local_type = MATSEQAIJ; 3564 bs = 1; 3565 break; 3566 case MATBAIJ_PRIVATE: 3567 new_local_type = MATSEQBAIJ; 3568 break; 3569 case MATSBAIJ_PRIVATE: 3570 new_local_type = MATSEQSBAIJ; 3571 break; 3572 default: 3573 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3574 break; 3575 } 3576 } else { /* by default, new_local_type is seqdense */ 3577 new_local_type = MATSEQDENSE; 3578 bs = 1; 3579 } 3580 3581 /* create MATIS object if needed */ 3582 if (reuse == MAT_INITIAL_MATRIX) { 3583 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3584 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3585 } else { 3586 /* it also destroys the local matrices */ 3587 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3588 } 3589 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3590 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3591 3592 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3593 3594 /* Global to local map of received indices */ 3595 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3596 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3597 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3598 3599 /* restore attributes -> type of incoming data and its size */ 3600 buf_size_idxs = 0; 3601 for (i=0;i<n_recvs;i++) { 3602 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3603 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3604 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3605 } 3606 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3607 3608 /* set preallocation */ 3609 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3610 if (!newisdense) { 3611 PetscInt *new_local_nnz=0; 3612 3613 ptr_vals = recv_buffer_vals; 3614 ptr_idxs = recv_buffer_idxs_local; 3615 if (n_recvs) { 3616 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3617 } 3618 for (i=0;i<n_recvs;i++) { 3619 PetscInt j; 3620 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3621 for (j=0;j<*(ptr_idxs+1);j++) { 3622 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3623 } 3624 } else { 3625 /* TODO */ 3626 } 3627 ptr_idxs += olengths_idxs[i]; 3628 } 3629 if (new_local_nnz) { 3630 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3631 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3632 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3633 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3634 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3635 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3636 } else { 3637 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3638 } 3639 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3640 } else { 3641 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3642 } 3643 3644 /* set values */ 3645 ptr_vals = recv_buffer_vals; 3646 ptr_idxs = recv_buffer_idxs_local; 3647 for (i=0;i<n_recvs;i++) { 3648 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3649 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3650 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3651 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3652 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3653 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3654 } else { 3655 /* TODO */ 3656 } 3657 ptr_idxs += olengths_idxs[i]; 3658 ptr_vals += olengths_vals[i]; 3659 } 3660 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3661 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3662 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3663 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3664 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3665 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3666 3667 #if 0 3668 if (!restrict_comm) { /* check */ 3669 Vec lvec,rvec; 3670 PetscReal infty_error; 3671 3672 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3673 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3674 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3675 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3676 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3677 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3678 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3679 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3680 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3681 } 3682 #endif 3683 3684 /* assemble new additional is (if any) */ 3685 if (nis) { 3686 PetscInt **temp_idxs,*count_is,j,psum; 3687 3688 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3689 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3690 ptr_idxs = recv_buffer_idxs_is; 3691 psum = 0; 3692 for (i=0;i<n_recvs;i++) { 3693 for (j=0;j<nis;j++) { 3694 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3695 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3696 psum += plen; 3697 ptr_idxs += plen+1; /* shift pointer to received data */ 3698 } 3699 } 3700 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3701 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3702 for (i=1;i<nis;i++) { 3703 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3704 } 3705 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3706 ptr_idxs = recv_buffer_idxs_is; 3707 for (i=0;i<n_recvs;i++) { 3708 for (j=0;j<nis;j++) { 3709 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3710 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3711 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3712 ptr_idxs += plen+1; /* shift pointer to received data */ 3713 } 3714 } 3715 for (i=0;i<nis;i++) { 3716 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3717 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3718 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3719 } 3720 ierr = PetscFree(count_is);CHKERRQ(ierr); 3721 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3722 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3723 } 3724 /* free workspace */ 3725 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3726 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3727 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3728 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3729 if (isdense) { 3730 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3731 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3732 } else { 3733 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3734 } 3735 if (nis) { 3736 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3737 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3738 } 3739 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3740 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3741 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3742 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3743 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3744 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3745 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3746 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3747 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3748 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3749 ierr = PetscFree(onodes);CHKERRQ(ierr); 3750 if (nis) { 3751 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3752 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3753 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3754 } 3755 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3756 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3757 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3758 for (i=0;i<nis;i++) { 3759 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3760 } 3761 } 3762 PetscFunctionReturn(0); 3763 } 3764 3765 /* temporary hack into ksp private data structure */ 3766 #include <petsc/private/kspimpl.h> 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3770 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3771 { 3772 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3773 PC_IS *pcis = (PC_IS*)pc->data; 3774 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3775 MatNullSpace CoarseNullSpace=NULL; 3776 ISLocalToGlobalMapping coarse_islg; 3777 IS coarse_is,*isarray; 3778 PetscInt i,im_active=-1,active_procs=-1; 3779 PetscInt nis,nisdofs,nisneu; 3780 PC pc_temp; 3781 PCType coarse_pc_type; 3782 KSPType coarse_ksp_type; 3783 PetscBool multilevel_requested,multilevel_allowed; 3784 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3785 Mat t_coarse_mat_is; 3786 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3787 PetscMPIInt all_procs; 3788 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3789 PetscBool compute_vecs = PETSC_FALSE; 3790 PetscScalar *array; 3791 PetscErrorCode ierr; 3792 3793 PetscFunctionBegin; 3794 /* Assign global numbering to coarse dofs */ 3795 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3796 PetscInt ocoarse_size; 3797 compute_vecs = PETSC_TRUE; 3798 ocoarse_size = pcbddc->coarse_size; 3799 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3800 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3801 /* see if we can avoid some work */ 3802 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3803 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3804 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3805 coarse_reuse = PETSC_FALSE; 3806 } else { /* we can safely reuse already computed coarse matrix */ 3807 coarse_reuse = PETSC_TRUE; 3808 } 3809 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3810 coarse_reuse = PETSC_FALSE; 3811 } 3812 /* reset any subassembling information */ 3813 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3814 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3815 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3816 coarse_reuse = PETSC_TRUE; 3817 } 3818 3819 /* count "active" (i.e. with positive local size) and "void" processes */ 3820 im_active = !!(pcis->n); 3821 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3822 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3823 void_procs = all_procs-active_procs; 3824 csin_type_simple = PETSC_TRUE; 3825 redist = PETSC_FALSE; 3826 if (pcbddc->current_level && void_procs) { 3827 csin_ml = PETSC_TRUE; 3828 ncoarse_ml = void_procs; 3829 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3830 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3831 csin_ds = PETSC_TRUE; 3832 ncoarse_ds = pcbddc->redistribute_coarse; 3833 redist = PETSC_TRUE; 3834 } else { 3835 csin_ds = PETSC_TRUE; 3836 ncoarse_ds = active_procs; 3837 redist = PETSC_TRUE; 3838 } 3839 } else { 3840 csin_ml = PETSC_FALSE; 3841 ncoarse_ml = all_procs; 3842 if (void_procs) { 3843 csin_ds = PETSC_TRUE; 3844 ncoarse_ds = void_procs; 3845 csin_type_simple = PETSC_FALSE; 3846 } else { 3847 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3848 csin_ds = PETSC_TRUE; 3849 ncoarse_ds = pcbddc->redistribute_coarse; 3850 redist = PETSC_TRUE; 3851 } else { 3852 csin_ds = PETSC_FALSE; 3853 ncoarse_ds = all_procs; 3854 } 3855 } 3856 } 3857 3858 /* 3859 test if we can go multilevel: three conditions must be satisfied: 3860 - we have not exceeded the number of levels requested 3861 - we can actually subassemble the active processes 3862 - we can find a suitable number of MPI processes where we can place the subassembled problem 3863 */ 3864 multilevel_allowed = PETSC_FALSE; 3865 multilevel_requested = PETSC_FALSE; 3866 if (pcbddc->current_level < pcbddc->max_levels) { 3867 multilevel_requested = PETSC_TRUE; 3868 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3869 multilevel_allowed = PETSC_FALSE; 3870 } else { 3871 multilevel_allowed = PETSC_TRUE; 3872 } 3873 } 3874 /* determine number of process partecipating to coarse solver */ 3875 if (multilevel_allowed) { 3876 ncoarse = ncoarse_ml; 3877 csin = csin_ml; 3878 redist = PETSC_FALSE; 3879 } else { 3880 ncoarse = ncoarse_ds; 3881 csin = csin_ds; 3882 } 3883 3884 /* creates temporary l2gmap and IS for coarse indexes */ 3885 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3886 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3887 3888 /* creates temporary MATIS object for coarse matrix */ 3889 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3890 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3891 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3892 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3893 #if 0 3894 { 3895 PetscViewer viewer; 3896 char filename[256]; 3897 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3898 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3899 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3900 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3901 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3902 } 3903 #endif 3904 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3905 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3906 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3907 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3909 3910 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3911 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3912 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3913 const PetscInt *idxs; 3914 ISLocalToGlobalMapping tmap; 3915 3916 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3917 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3918 /* allocate space for temporary storage */ 3919 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3920 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3921 /* allocate for IS array */ 3922 nisdofs = pcbddc->n_ISForDofsLocal; 3923 nisneu = !!pcbddc->NeumannBoundariesLocal; 3924 nis = nisdofs + nisneu; 3925 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3926 /* dofs splitting */ 3927 for (i=0;i<nisdofs;i++) { 3928 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3929 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3930 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3931 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3932 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3933 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3934 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3935 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3936 } 3937 /* neumann boundaries */ 3938 if (pcbddc->NeumannBoundariesLocal) { 3939 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3940 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3941 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3942 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3943 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3944 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3945 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3946 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3947 } 3948 /* free memory */ 3949 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3950 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3951 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3952 } else { 3953 nis = 0; 3954 nisdofs = 0; 3955 nisneu = 0; 3956 isarray = NULL; 3957 } 3958 /* destroy no longer needed map */ 3959 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3960 3961 /* restrict on coarse candidates (if needed) */ 3962 coarse_mat_is = NULL; 3963 if (csin) { 3964 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 3965 if (redist) { 3966 PetscMPIInt rank; 3967 PetscInt spc,n_spc_p1,dest[1],destsize; 3968 3969 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3970 spc = active_procs/ncoarse; 3971 n_spc_p1 = active_procs%ncoarse; 3972 if (im_active) { 3973 destsize = 1; 3974 if (rank > n_spc_p1*(spc+1)-1) { 3975 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 3976 } else { 3977 dest[0] = rank/(spc+1); 3978 } 3979 } else { 3980 destsize = 0; 3981 } 3982 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3983 } else if (csin_type_simple) { 3984 PetscMPIInt rank; 3985 PetscInt issize,isidx; 3986 3987 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 3988 if (im_active) { 3989 issize = 1; 3990 isidx = (PetscInt)rank; 3991 } else { 3992 issize = 0; 3993 isidx = -1; 3994 } 3995 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3996 } else { /* get a suitable subassembling pattern from MATIS code */ 3997 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3998 } 3999 4000 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4001 if (!redist || ncoarse <= void_procs) { 4002 PetscInt ncoarse_cand,tissize,*nisindices; 4003 PetscInt *coarse_candidates; 4004 const PetscInt* tisindices; 4005 4006 /* get coarse candidates' ranks in pc communicator */ 4007 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4008 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4009 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4010 if (!coarse_candidates[i]) { 4011 coarse_candidates[ncoarse_cand++]=i; 4012 } 4013 } 4014 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4015 4016 4017 if (pcbddc->dbg_flag) { 4018 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4019 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4020 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4021 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4022 for (i=0;i<ncoarse_cand;i++) { 4023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4024 } 4025 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4026 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4027 } 4028 /* shift the pattern on coarse candidates */ 4029 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4030 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4031 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4032 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4033 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4034 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4035 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4036 } 4037 if (pcbddc->dbg_flag) { 4038 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4039 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4040 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4041 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4042 } 4043 } 4044 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4045 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4046 } else { 4047 if (pcbddc->dbg_flag) { 4048 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4050 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4051 } 4052 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4053 coarse_mat_is = t_coarse_mat_is; 4054 } 4055 4056 /* create local to global scatters for coarse problem */ 4057 if (compute_vecs) { 4058 PetscInt lrows; 4059 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4060 if (coarse_mat_is) { 4061 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4062 } else { 4063 lrows = 0; 4064 } 4065 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4066 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4067 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4068 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4069 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4070 } 4071 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4072 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4073 4074 /* set defaults for coarse KSP and PC */ 4075 if (multilevel_allowed) { 4076 coarse_ksp_type = KSPRICHARDSON; 4077 coarse_pc_type = PCBDDC; 4078 } else { 4079 coarse_ksp_type = KSPPREONLY; 4080 coarse_pc_type = PCREDUNDANT; 4081 } 4082 4083 /* print some info if requested */ 4084 if (pcbddc->dbg_flag) { 4085 if (!multilevel_allowed) { 4086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4087 if (multilevel_requested) { 4088 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4089 } else if (pcbddc->max_levels) { 4090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4091 } 4092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4093 } 4094 } 4095 4096 /* create the coarse KSP object only once with defaults */ 4097 if (coarse_mat_is) { 4098 MatReuse coarse_mat_reuse; 4099 PetscViewer dbg_viewer = NULL; 4100 if (pcbddc->dbg_flag) { 4101 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4102 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4103 } 4104 if (!pcbddc->coarse_ksp) { 4105 char prefix[256],str_level[16]; 4106 size_t len; 4107 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4108 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4109 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4110 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4111 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4112 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4113 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4114 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4115 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4116 /* prefix */ 4117 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4118 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4119 if (!pcbddc->current_level) { 4120 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4121 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4122 } else { 4123 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4124 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4125 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4126 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4127 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4128 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4129 } 4130 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4131 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4132 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4133 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4134 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4135 /* allow user customization */ 4136 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4137 } 4138 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4139 if (nisdofs) { 4140 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4141 for (i=0;i<nisdofs;i++) { 4142 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4143 } 4144 } 4145 if (nisneu) { 4146 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4147 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4148 } 4149 4150 /* get some info after set from options */ 4151 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4152 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4153 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4154 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4155 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4156 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4157 isbddc = PETSC_FALSE; 4158 } 4159 if (isredundant) { 4160 KSP inner_ksp; 4161 PC inner_pc; 4162 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4163 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4164 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4165 } 4166 4167 /* assemble coarse matrix */ 4168 if (coarse_reuse) { 4169 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4170 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4171 coarse_mat_reuse = MAT_REUSE_MATRIX; 4172 } else { 4173 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4174 } 4175 if (isbddc || isnn) { 4176 if (pcbddc->coarsening_ratio > 1) { 4177 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4178 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4179 if (pcbddc->dbg_flag) { 4180 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4181 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4182 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4183 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4184 } 4185 } 4186 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4187 } else { 4188 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4189 coarse_mat = coarse_mat_is; 4190 } 4191 } else { 4192 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4193 } 4194 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4195 4196 /* propagate symmetry info to coarse matrix */ 4197 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4198 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4199 4200 /* set operators */ 4201 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4202 if (pcbddc->dbg_flag) { 4203 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4204 } 4205 } else { /* processes non partecipating to coarse solver (if any) */ 4206 coarse_mat = 0; 4207 } 4208 ierr = PetscFree(isarray);CHKERRQ(ierr); 4209 #if 0 4210 { 4211 PetscViewer viewer; 4212 char filename[256]; 4213 sprintf(filename,"coarse_mat.m"); 4214 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4215 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4216 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4217 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4218 } 4219 #endif 4220 4221 /* Compute coarse null space (special handling by BDDC only) */ 4222 if (pcbddc->NullSpace) { 4223 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4224 } 4225 4226 if (pcbddc->coarse_ksp) { 4227 Vec crhs,csol; 4228 PetscBool ispreonly; 4229 if (CoarseNullSpace) { 4230 if (isbddc) { 4231 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4232 } else { 4233 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4234 } 4235 } 4236 /* setup coarse ksp */ 4237 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4238 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4239 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4240 /* hack */ 4241 if (!csol) { 4242 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4243 } 4244 if (!crhs) { 4245 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4246 } 4247 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4248 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4249 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4250 KSP check_ksp; 4251 KSPType check_ksp_type; 4252 PC check_pc; 4253 Vec check_vec,coarse_vec; 4254 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4255 PetscInt its; 4256 PetscBool compute_eigs; 4257 PetscReal *eigs_r,*eigs_c; 4258 PetscInt neigs; 4259 const char *prefix; 4260 4261 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4262 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4263 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4264 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4265 if (ispreonly) { 4266 check_ksp_type = KSPPREONLY; 4267 compute_eigs = PETSC_FALSE; 4268 } else { 4269 check_ksp_type = KSPGMRES; 4270 compute_eigs = PETSC_TRUE; 4271 } 4272 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4273 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4274 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4275 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4276 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4277 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4278 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4279 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4280 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4281 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4282 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4283 /* create random vec */ 4284 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4285 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4286 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4287 if (CoarseNullSpace) { 4288 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4289 } 4290 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4291 /* solve coarse problem */ 4292 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4293 if (CoarseNullSpace) { 4294 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4295 } 4296 /* set eigenvalue estimation if preonly has not been requested */ 4297 if (compute_eigs) { 4298 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4299 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4300 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4301 lambda_max = eigs_r[neigs-1]; 4302 lambda_min = eigs_r[0]; 4303 if (pcbddc->use_coarse_estimates) { 4304 if (lambda_max>lambda_min) { 4305 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4306 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4307 } 4308 } 4309 } 4310 4311 /* check coarse problem residual error */ 4312 if (pcbddc->dbg_flag) { 4313 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4314 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4315 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4316 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4317 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4318 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4319 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4320 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4321 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4322 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4323 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4324 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4325 if (compute_eigs) { 4326 PetscReal lambda_max_s,lambda_min_s; 4327 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4328 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4329 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4330 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4331 for (i=0;i<neigs;i++) { 4332 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4333 } 4334 } 4335 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4336 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4337 } 4338 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4339 if (compute_eigs) { 4340 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4341 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4342 } 4343 } 4344 } 4345 /* print additional info */ 4346 if (pcbddc->dbg_flag) { 4347 /* waits until all processes reaches this point */ 4348 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4349 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4350 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4351 } 4352 4353 /* free memory */ 4354 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4355 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4356 PetscFunctionReturn(0); 4357 } 4358 4359 #undef __FUNCT__ 4360 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4361 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4362 { 4363 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4364 PC_IS* pcis = (PC_IS*)pc->data; 4365 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4366 PetscInt i,local_size,coarse_size=0; 4367 PetscInt *local_primal_indices=NULL; 4368 PetscInt *t_local_primal_indices=NULL; 4369 PetscErrorCode ierr; 4370 4371 PetscFunctionBegin; 4372 /* Compute global number of coarse dofs */ 4373 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4374 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4375 } 4376 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4377 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4378 local_size = 0; 4379 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4380 PetscInt j; 4381 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4382 } 4383 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4384 if (local_size != pcbddc->local_primal_size) { 4385 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4386 } 4387 4388 /* check numbering */ 4389 if (pcbddc->dbg_flag) { 4390 PetscScalar coarsesum,*array; 4391 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4392 4393 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4394 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4395 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4396 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4397 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4398 for (i=0;i<pcbddc->local_primal_size;i++) { 4399 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4400 } 4401 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4402 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4403 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4404 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4405 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4406 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4407 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4408 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4409 for (i=0;i<pcis->n;i++) { 4410 if (array[i] == 1.0) { 4411 set_error = PETSC_TRUE; 4412 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4413 } 4414 } 4415 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4416 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4417 for (i=0;i<pcis->n;i++) { 4418 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4419 } 4420 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4421 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4422 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4423 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4424 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4425 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4426 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4427 PetscInt *gidxs; 4428 4429 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4430 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4431 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4432 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4434 for (i=0;i<pcbddc->local_primal_size;i++) { 4435 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4436 } 4437 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4438 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4439 } 4440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4441 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4442 } 4443 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4444 /* get back data */ 4445 *coarse_size_n = coarse_size; 4446 *local_primal_indices_n = local_primal_indices; 4447 PetscFunctionReturn(0); 4448 } 4449 4450 #undef __FUNCT__ 4451 #define __FUNCT__ "PCBDDCGlobalToLocal" 4452 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4453 { 4454 IS localis_t; 4455 PetscInt i,lsize,*idxs,n; 4456 PetscScalar *vals; 4457 PetscErrorCode ierr; 4458 4459 PetscFunctionBegin; 4460 /* get indices in local ordering exploiting local to global map */ 4461 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4462 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4463 for (i=0;i<lsize;i++) vals[i] = 1.0; 4464 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4465 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4466 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4467 if (idxs) { /* multilevel guard */ 4468 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4469 } 4470 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4471 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4472 ierr = PetscFree(vals);CHKERRQ(ierr); 4473 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4474 /* now compute set in local ordering */ 4475 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4476 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4477 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4478 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4479 for (i=0,lsize=0;i<n;i++) { 4480 if (PetscRealPart(vals[i]) > 0.5) { 4481 lsize++; 4482 } 4483 } 4484 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4485 for (i=0,lsize=0;i<n;i++) { 4486 if (PetscRealPart(vals[i]) > 0.5) { 4487 idxs[lsize++] = i; 4488 } 4489 } 4490 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4491 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4492 *localis = localis_t; 4493 PetscFunctionReturn(0); 4494 } 4495 4496 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4497 #undef __FUNCT__ 4498 #define __FUNCT__ "PCBDDCMatMult_Private" 4499 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4500 { 4501 PCBDDCChange_ctx change_ctx; 4502 PetscErrorCode ierr; 4503 4504 PetscFunctionBegin; 4505 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4506 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4507 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4508 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4514 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4515 { 4516 PCBDDCChange_ctx change_ctx; 4517 PetscErrorCode ierr; 4518 4519 PetscFunctionBegin; 4520 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4521 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4522 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4523 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4524 PetscFunctionReturn(0); 4525 } 4526 4527 #undef __FUNCT__ 4528 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4529 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4530 { 4531 PC_IS *pcis=(PC_IS*)pc->data; 4532 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4533 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4534 Mat S_j; 4535 PetscInt *used_xadj,*used_adjncy; 4536 PetscBool free_used_adj; 4537 PetscErrorCode ierr; 4538 4539 PetscFunctionBegin; 4540 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4541 free_used_adj = PETSC_FALSE; 4542 if (pcbddc->sub_schurs_layers == -1) { 4543 used_xadj = NULL; 4544 used_adjncy = NULL; 4545 } else { 4546 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4547 used_xadj = pcbddc->mat_graph->xadj; 4548 used_adjncy = pcbddc->mat_graph->adjncy; 4549 } else if (pcbddc->computed_rowadj) { 4550 used_xadj = pcbddc->mat_graph->xadj; 4551 used_adjncy = pcbddc->mat_graph->adjncy; 4552 } else { 4553 PetscBool flg_row=PETSC_FALSE; 4554 const PetscInt *xadj,*adjncy; 4555 PetscInt nvtxs; 4556 4557 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4558 if (flg_row) { 4559 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4560 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4561 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4562 free_used_adj = PETSC_TRUE; 4563 } else { 4564 pcbddc->sub_schurs_layers = -1; 4565 used_xadj = NULL; 4566 used_adjncy = NULL; 4567 } 4568 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4569 } 4570 } 4571 4572 /* setup sub_schurs data */ 4573 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4574 if (!sub_schurs->use_mumps) { 4575 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4576 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4577 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4578 } else { 4579 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4580 } 4581 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4582 4583 /* free adjacency */ 4584 if (free_used_adj) { 4585 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4586 } 4587 PetscFunctionReturn(0); 4588 } 4589 4590 #undef __FUNCT__ 4591 #define __FUNCT__ "PCBDDCInitSubSchurs" 4592 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4593 { 4594 PC_IS *pcis=(PC_IS*)pc->data; 4595 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4596 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4597 PCBDDCGraph graph; 4598 PetscErrorCode ierr; 4599 4600 PetscFunctionBegin; 4601 /* attach interface graph for determining subsets */ 4602 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4603 IS verticesIS; 4604 4605 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4606 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4607 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4608 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4609 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4610 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4611 /* 4612 if (pcbddc->dbg_flag) { 4613 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4614 } 4615 */ 4616 } else { 4617 graph = pcbddc->mat_graph; 4618 } 4619 4620 /* sub_schurs init */ 4621 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4622 4623 /* free graph struct */ 4624 if (pcbddc->sub_schurs_rebuild) { 4625 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4626 } 4627 PetscFunctionReturn(0); 4628 } 4629