1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCAdaptiveSelection" 10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 14 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 15 PetscBLASInt *B_iwork,*B_ifail; 16 PetscScalar *work,lwork; 17 PetscScalar *St,*S,*eigv; 18 PetscScalar *Sarray,*Starray; 19 PetscReal *eigs,thresh; 20 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 21 PetscBool allocated_S_St; 22 #if defined(PETSC_USE_COMPLEX) 23 PetscReal *rwork; 24 #endif 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 if (!sub_schurs->use_mumps) { 29 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 30 } 31 32 if (pcbddc->dbg_flag) { 33 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 34 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 35 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 36 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 37 } 38 39 if (pcbddc->dbg_flag) { 40 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 41 } 42 43 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 44 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 45 } 46 47 /* max size of subsets */ 48 mss = 0; 49 for (i=0;i<sub_schurs->n_subs;i++) { 50 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 51 PetscInt subset_size; 52 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 53 mss = PetscMax(mss,subset_size); 54 } 55 } 56 57 /* min/max and threshold */ 58 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 59 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 60 nmax = PetscMax(nmin,nmax); 61 allocated_S_St = PETSC_FALSE; 62 if (nmin) { 63 allocated_S_St = PETSC_TRUE; 64 } 65 66 /* allocate lapack workspace */ 67 cum = cum2 = 0; 68 maxneigs = 0; 69 for (i=0;i<sub_schurs->n_subs;i++) { 70 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 71 PetscInt n,subset_size; 72 73 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 74 n = PetscMin(subset_size,nmax); 75 cum += subset_size; 76 cum2 += subset_size*n; 77 maxneigs = PetscMax(maxneigs,n); 78 } 79 } 80 if (mss) { 81 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 82 PetscBLASInt B_itype = 1; 83 PetscBLASInt B_N = mss; 84 PetscReal zero = 0.0; 85 PetscReal eps = 0.0; /* dlamch? */ 86 87 B_lwork = -1; 88 S = NULL; 89 St = NULL; 90 eigs = NULL; 91 eigv = NULL; 92 B_iwork = NULL; 93 B_ifail = NULL; 94 thresh = 1.0; 95 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 96 #if defined(PETSC_USE_COMPLEX) 97 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 98 #else 99 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 100 #endif 101 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 102 ierr = PetscFPTrapPop();CHKERRQ(ierr); 103 } else { 104 /* TODO */ 105 } 106 } else { 107 lwork = 0; 108 } 109 110 nv = 0; 111 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */ 112 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr); 113 } 114 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 115 if (allocated_S_St) { 116 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 117 } 118 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 119 #if defined(PETSC_USE_COMPLEX) 120 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 121 #endif 122 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 124 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 125 nv+cum,&pcbddc->adaptive_constraints_idxs, 126 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 127 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 128 129 maxneigs = 0; 130 cum = cum2 = cumarray = 0; 131 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 132 pcbddc->adaptive_constraints_data_ptr[0] = 0; 133 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 134 const PetscInt *idxs; 135 136 ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 137 for (cum=0;cum<nv;cum++) { 138 pcbddc->adaptive_constraints_n[cum] = 1; 139 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 140 pcbddc->adaptive_constraints_data[cum] = 1.0; 141 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 142 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 143 } 144 cum2 = cum; 145 ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr); 146 } 147 148 if (mss) { /* multilevel */ 149 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 150 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 151 } 152 153 for (i=0;i<sub_schurs->n_subs;i++) { 154 PetscInt j,subset_size; 155 156 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 157 if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) { 158 const PetscInt *idxs; 159 PetscReal infty = PETSC_MAX_REAL; 160 PetscInt eigs_start = 0; 161 PetscBLASInt B_N; 162 163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 165 if (sub_schurs->is_hermitian) { 166 PetscInt j; 167 for (j=0;j<subset_size;j++) { 168 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 169 } 170 for (j=0;j<subset_size;j++) { 171 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 172 } 173 } else { 174 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 175 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 176 } 177 } else { 178 S = Sarray + cumarray; 179 St = Starray + cumarray; 180 } 181 182 /* Threshold: this is an heuristic for edges */ 183 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 184 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 185 186 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 187 PetscBLASInt B_itype = 1; 188 PetscBLASInt B_IL, B_IU; 189 PetscReal eps = -1.0; /* dlamch? */ 190 PetscInt nmin_s; 191 192 /* ask for eigenvalues larger than thresh */ 193 if (pcbddc->dbg_flag) { 194 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 195 } 196 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 197 #if defined(PETSC_USE_COMPLEX) 198 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 199 #else 200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 201 #endif 202 ierr = PetscFPTrapPop();CHKERRQ(ierr); 203 if (B_ierr) { 204 if (B_ierr < 0 ) { 205 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 206 } else if (B_ierr <= B_N) { 207 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 208 } else { 209 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 210 } 211 } 212 213 if (B_neigs > nmax) { 214 if (pcbddc->dbg_flag) { 215 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 216 } 217 eigs_start = B_neigs -nmax; 218 B_neigs = nmax; 219 } 220 221 nmin_s = PetscMin(nmin,B_N); 222 if (B_neigs < nmin_s) { 223 PetscBLASInt B_neigs2; 224 225 B_IU = B_N - B_neigs; 226 B_IL = B_N - nmin_s + 1; 227 if (pcbddc->dbg_flag) { 228 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 229 } 230 if (sub_schurs->is_hermitian) { 231 PetscInt j; 232 for (j=0;j<subset_size;j++) { 233 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 234 } 235 for (j=0;j<subset_size;j++) { 236 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 237 } 238 } else { 239 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 240 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 241 } 242 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 243 #if defined(PETSC_USE_COMPLEX) 244 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 245 #else 246 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 247 #endif 248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 249 B_neigs += B_neigs2; 250 } 251 if (B_ierr) { 252 if (B_ierr < 0 ) { 253 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 254 } else if (B_ierr <= B_N) { 255 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 256 } else { 257 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 258 } 259 } 260 if (pcbddc->dbg_flag) { 261 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs); 262 for (j=0;j<B_neigs;j++) { 263 if (eigs[j] == 0.0) { 264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n"); 265 } else { 266 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]); 267 } 268 } 269 } 270 } else { 271 /* TODO */ 272 } 273 274 maxneigs = PetscMax(B_neigs,maxneigs); 275 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 276 if (B_neigs) { 277 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 278 279 if (pcbddc->dbg_flag > 1) { 280 PetscInt ii; 281 for (ii=0;ii<B_neigs;ii++) { 282 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N); 283 for (j=0;j<B_N;j++) { 284 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 285 } 286 } 287 } 288 #if 0 289 for (j=0;j<B_neigs;j++) { 290 PetscBLASInt Blas_N,Blas_one = 1.0; 291 PetscScalar norm; 292 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 293 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 294 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 295 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 296 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 297 } else { 298 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 299 } 300 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 301 } 302 #endif 303 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 304 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 305 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 306 cum++; 307 } 308 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 309 } 310 /* shift for next computation */ 311 cumarray += subset_size*subset_size; 312 } 313 if (pcbddc->dbg_flag) { 314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 315 } 316 317 if (mss) { 318 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 319 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 320 /* destroy matrices (junk) */ 321 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 322 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 323 } 324 if (allocated_S_St) { 325 ierr = PetscFree2(S,St);CHKERRQ(ierr); 326 } 327 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 328 #if defined(PETSC_USE_COMPLEX) 329 ierr = PetscFree(rwork);CHKERRQ(ierr); 330 #endif 331 if (pcbddc->dbg_flag) { 332 PetscInt maxneigs_r; 333 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 334 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 335 } 336 PetscFunctionReturn(0); 337 } 338 339 #undef __FUNCT__ 340 #define __FUNCT__ "PCBDDCSetUpSolvers" 341 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 342 { 343 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 344 PetscScalar *coarse_submat_vals; 345 PetscErrorCode ierr; 346 347 PetscFunctionBegin; 348 /* Setup local scatters R_to_B and (optionally) R_to_D */ 349 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 350 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 351 352 /* Setup local neumann solver ksp_R */ 353 /* PCBDDCSetUpLocalScatters should be called first! */ 354 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 355 356 /* Change global null space passed in by the user if change of basis has been requested */ 357 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 358 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 359 } 360 361 /* 362 Setup local correction and local part of coarse basis. 363 Gives back the dense local part of the coarse matrix in column major ordering 364 */ 365 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 366 367 /* Compute total number of coarse nodes and setup coarse solver */ 368 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 369 370 /* free */ 371 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 #undef __FUNCT__ 376 #define __FUNCT__ "PCBDDCResetCustomization" 377 PetscErrorCode PCBDDCResetCustomization(PC pc) 378 { 379 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 380 PetscErrorCode ierr; 381 382 PetscFunctionBegin; 383 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 384 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 385 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 386 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 387 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 388 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 389 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 390 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 391 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 392 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 393 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 #undef __FUNCT__ 398 #define __FUNCT__ "PCBDDCResetTopography" 399 PetscErrorCode PCBDDCResetTopography(PC pc) 400 { 401 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 406 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 407 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 408 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 409 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "PCBDDCResetSolvers" 415 PetscErrorCode PCBDDCResetSolvers(PC pc) 416 { 417 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 418 PetscScalar *array; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 423 if (pcbddc->coarse_phi_B) { 424 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 425 ierr = PetscFree(array);CHKERRQ(ierr); 426 } 427 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 428 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 429 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 430 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 431 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 432 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 433 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 434 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 435 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 436 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 437 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 438 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 439 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 440 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 441 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 442 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 443 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 445 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 446 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 447 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 448 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 449 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 450 PetscFunctionReturn(0); 451 } 452 453 #undef __FUNCT__ 454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 456 { 457 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 458 PC_IS *pcis = (PC_IS*)pc->data; 459 VecType impVecType; 460 PetscInt n_constraints,n_R,old_size; 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 if (!pcbddc->ConstraintMatrix) { 465 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 466 } 467 /* get sizes */ 468 n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices; 469 n_R = pcis->n-pcbddc->n_vertices; 470 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 471 /* local work vectors (try to avoid unneeded work)*/ 472 /* R nodes */ 473 old_size = -1; 474 if (pcbddc->vec1_R) { 475 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 476 } 477 if (n_R != old_size) { 478 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 479 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 480 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 481 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 482 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 483 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 484 } 485 /* local primal dofs */ 486 old_size = -1; 487 if (pcbddc->vec1_P) { 488 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 489 } 490 if (pcbddc->local_primal_size != old_size) { 491 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 492 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 493 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 494 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 495 } 496 /* local explicit constraints */ 497 old_size = -1; 498 if (pcbddc->vec1_C) { 499 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 500 } 501 if (n_constraints && n_constraints != old_size) { 502 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 503 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 504 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 505 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 506 } 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "PCBDDCSetUpCorrection" 512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 513 { 514 PetscErrorCode ierr; 515 /* pointers to pcis and pcbddc */ 516 PC_IS* pcis = (PC_IS*)pc->data; 517 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 518 /* submatrices of local problem */ 519 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 520 /* submatrices of local coarse problem */ 521 Mat S_VV,S_CV,S_VC,S_CC; 522 /* working matrices */ 523 Mat C_CR; 524 /* additional working stuff */ 525 PC pc_R; 526 Mat F; 527 PetscBool isLU,isCHOL,isILU; 528 529 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 530 PetscScalar *work; 531 PetscInt *idx_V_B; 532 PetscInt n,n_vertices,n_constraints; 533 PetscInt i,n_R,n_D,n_B; 534 PetscBool unsymmetric_check; 535 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 536 MatType impMatType; 537 /* some shortcuts to scalars */ 538 PetscScalar one=1.0,m_one=-1.0; 539 540 PetscFunctionBegin; 541 n_vertices = pcbddc->n_vertices; 542 n_constraints = pcbddc->local_primal_size-n_vertices; 543 /* Set Non-overlapping dimensions */ 544 n_B = pcis->n_B; 545 n_D = pcis->n - n_B; 546 n_R = pcis->n - n_vertices; 547 548 /* Set types for local objects needed by BDDC precondtioner */ 549 impMatType = MATSEQDENSE; 550 551 /* vertices in boundary numbering */ 552 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 553 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 554 if (i != n_vertices) { 555 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 556 } 557 558 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 559 ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 560 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 561 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 562 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 563 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 564 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 565 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 566 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 567 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 568 569 unsymmetric_check = PETSC_FALSE; 570 /* allocate workspace */ 571 n = 0; 572 if (n_constraints) { 573 n += n_R*n_constraints; 574 } 575 if (n_vertices) { 576 n = PetscMax(2*n_R*n_vertices,n); 577 n = PetscMax((n_R+n_B)*n_vertices,n); 578 } 579 if (!pcbddc->issym) { 580 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 581 unsymmetric_check = PETSC_TRUE; 582 } 583 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 584 585 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 586 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 587 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 588 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 589 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 590 if (isLU || isILU || isCHOL) { 591 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 592 } else { 593 F = NULL; 594 } 595 596 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 597 if (n_constraints) { 598 Mat M1,M2,M3; 599 Mat auxmat; 600 IS is_aux; 601 PetscScalar *array,*array2; 602 603 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 604 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 605 606 /* Extract constraints on R nodes: C_{CR} */ 607 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 608 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 609 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 610 611 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 612 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 613 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 614 for (i=0;i<n_constraints;i++) { 615 const PetscScalar *row_cmat_values; 616 const PetscInt *row_cmat_indices; 617 PetscInt size_of_constraint,j; 618 619 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 620 for (j=0;j<size_of_constraint;j++) { 621 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 622 } 623 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 624 } 625 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 626 if (F) { 627 Mat B; 628 629 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 630 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 631 ierr = MatDestroy(&B);CHKERRQ(ierr); 632 } else { 633 PetscScalar *marr; 634 635 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 636 for (i=0;i<n_constraints;i++) { 637 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 638 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 639 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 640 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 641 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 642 } 643 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 644 } 645 if (!pcbddc->switch_static) { 646 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 647 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 648 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 649 for (i=0;i<n_constraints;i++) { 650 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 651 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 652 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 653 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 654 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 655 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 656 } 657 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 658 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 659 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 660 } else { 661 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 662 pcbddc->local_auxmat2 = local_auxmat2_R; 663 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 664 } 665 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 666 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 667 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 668 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 669 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 670 if (isCHOL) { 671 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 672 } else { 673 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 674 } 675 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 676 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 677 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 678 ierr = MatDestroy(&M2);CHKERRQ(ierr); 679 ierr = MatDestroy(&M3);CHKERRQ(ierr); 680 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 681 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 682 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 683 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 684 ierr = MatDestroy(&M1);CHKERRQ(ierr); 685 } 686 /* Get submatrices from subdomain matrix */ 687 if (n_vertices) { 688 Mat newmat; 689 IS is_aux; 690 PetscInt ibs,mbs; 691 PetscBool issbaij; 692 693 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 694 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 695 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 696 if (ibs != mbs) { /* need to convert to SEQAIJ */ 697 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 698 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 699 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 700 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 701 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 702 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 703 } else { 704 /* this is safe */ 705 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 706 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 707 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 708 ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 709 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 710 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 711 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 712 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 713 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 714 } else { 715 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 716 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 717 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 718 } 719 /* need to convert to SeqAIJ for MatMatMult with dense matrix */ 720 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 721 } 722 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 723 } 724 725 /* Matrix of coarse basis functions (local) */ 726 if (pcbddc->coarse_phi_B) { 727 PetscInt on_B,on_primal,on_D=n_D; 728 if (pcbddc->coarse_phi_D) { 729 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 730 } 731 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 732 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 733 PetscScalar *marray; 734 735 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 736 ierr = PetscFree(marray);CHKERRQ(ierr); 737 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 738 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 739 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 740 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 741 } 742 } 743 744 if (!pcbddc->coarse_phi_B) { 745 PetscScalar *marray; 746 747 n = n_B*pcbddc->local_primal_size; 748 if (pcbddc->switch_static || pcbddc->dbg_flag) { 749 n += n_D*pcbddc->local_primal_size; 750 } 751 if (!pcbddc->issym) { 752 n *= 2; 753 } 754 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 755 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 756 n = n_B*pcbddc->local_primal_size; 757 if (pcbddc->switch_static || pcbddc->dbg_flag) { 758 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 759 n += n_D*pcbddc->local_primal_size; 760 } 761 if (!pcbddc->issym) { 762 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 763 if (pcbddc->switch_static || pcbddc->dbg_flag) { 764 n = n_B*pcbddc->local_primal_size; 765 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 766 } 767 } else { 768 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 769 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 770 if (pcbddc->switch_static || pcbddc->dbg_flag) { 771 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 772 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 773 } 774 } 775 } 776 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 777 /* vertices */ 778 if (n_vertices) { 779 780 if (n_R) { 781 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 782 PetscBLASInt B_N,B_one = 1; 783 PetscScalar *x,*y; 784 785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 786 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 787 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 788 if (F) { 789 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 790 } else { 791 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 792 for (i=0;i<n_vertices;i++) { 793 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 794 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 795 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 796 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 797 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 798 } 799 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 800 } 801 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 802 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 803 if (n_constraints) { 804 Mat B; 805 806 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 807 for (i=0;i<n_vertices;i++) { 808 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 809 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 810 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 811 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 812 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 813 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 814 } 815 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 816 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 817 ierr = MatDestroy(&B);CHKERRQ(ierr); 818 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 819 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 820 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 821 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 822 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 823 ierr = MatDestroy(&B);CHKERRQ(ierr); 824 } 825 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 826 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 827 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 828 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 829 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 830 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 831 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 832 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 833 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 834 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 835 } else { 836 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 837 } 838 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 839 /* coarse basis functions */ 840 for (i=0;i<n_vertices;i++) { 841 PetscScalar *y; 842 843 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 844 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 845 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 846 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 847 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 848 y[n_B*i+idx_V_B[i]] = 1.0; 849 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 850 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 851 852 if (pcbddc->switch_static || pcbddc->dbg_flag) { 853 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 854 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 855 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 856 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 857 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 858 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 859 } 860 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 861 } 862 } 863 864 if (n_constraints) { 865 Mat B; 866 867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 868 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 869 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 870 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 871 if (n_vertices) { 872 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 873 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 874 } else { 875 Mat S_VCt; 876 877 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 878 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 879 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 880 } 881 } 882 ierr = MatDestroy(&B);CHKERRQ(ierr); 883 /* coarse basis functions */ 884 for (i=0;i<n_constraints;i++) { 885 PetscScalar *y; 886 887 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 888 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 889 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 890 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 893 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 894 if (pcbddc->switch_static || pcbddc->dbg_flag) { 895 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 896 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 897 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 899 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 900 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 901 } 902 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 903 } 904 } 905 if (n_constraints) { 906 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 907 } 908 909 /* compute other basis functions for non-symmetric problems */ 910 if (!pcbddc->issym) { 911 912 if (n_constraints) { 913 Mat S_CCT,B_C; 914 915 /* this is a lazy thing */ 916 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 917 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 918 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 919 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 920 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 921 if (n_vertices) { 922 Mat B_V,S_VCT; 923 924 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 925 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 926 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 927 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 928 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 929 } 930 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 931 } else { /* if there are no constraints, reset work */ 932 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 933 } 934 if (n_vertices && n_R) { 935 Mat A_VRT; 936 PetscScalar *marray; 937 PetscBLASInt B_N,B_one = 1; 938 939 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 940 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 941 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 942 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 943 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 944 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 945 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 946 } 947 948 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 949 for (i=0;i<pcbddc->local_primal_size;i++) { 950 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 951 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 952 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 953 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 954 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 955 } 956 } else { 957 for (i=0;i<pcbddc->local_primal_size;i++) { 958 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 959 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 960 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 961 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 962 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 963 } 964 } 965 /* coarse basis functions */ 966 for (i=0;i<pcbddc->local_primal_size;i++) { 967 PetscScalar *y; 968 969 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 970 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 971 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 972 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974 if (i<n_vertices) { 975 y[n_B*i+idx_V_B[i]] = 1.0; 976 } 977 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 978 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 979 980 if (pcbddc->switch_static || pcbddc->dbg_flag) { 981 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 982 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 983 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 986 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 987 } 988 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 989 } 990 } 991 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 992 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 993 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 994 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 995 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 996 /* Checking coarse_sub_mat and coarse basis functios */ 997 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 998 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 999 if (pcbddc->dbg_flag) { 1000 Mat coarse_sub_mat; 1001 Mat AUXMAT,TM1,TM2,TM3,TM4; 1002 Mat coarse_phi_D,coarse_phi_B; 1003 Mat coarse_psi_D,coarse_psi_B; 1004 Mat A_II,A_BB,A_IB,A_BI; 1005 Mat C_B,CPHI; 1006 IS is_dummy; 1007 Vec mones; 1008 MatType checkmattype=MATSEQAIJ; 1009 PetscReal real_value; 1010 1011 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1012 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1013 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1014 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1015 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1016 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1017 if (unsymmetric_check) { 1018 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1019 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1020 } 1021 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1022 1023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1024 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr); 1025 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1026 if (unsymmetric_check) { 1027 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1028 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1029 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1030 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1031 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1032 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1033 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1034 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1035 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1036 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1037 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1038 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1039 } else { 1040 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1041 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1042 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1043 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1044 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1045 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1046 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1047 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1048 } 1049 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1050 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1051 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1052 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1053 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1054 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1055 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1056 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1057 1058 /* check constraints */ 1059 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1060 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1061 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1062 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1063 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1064 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1065 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1067 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1068 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1069 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1070 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1071 1072 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1073 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1074 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1075 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1076 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1077 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1078 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1079 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1080 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1081 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1082 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1083 if (unsymmetric_check) { 1084 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1085 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1086 } 1087 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1088 } 1089 /* free memory */ 1090 ierr = PetscFree(work);CHKERRQ(ierr); 1091 if (n_vertices) { 1092 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1093 } 1094 if (n_constraints) { 1095 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1096 } 1097 /* get back data */ 1098 *coarse_submat_vals_n = coarse_submat_vals; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1104 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1105 { 1106 Mat *work_mat; 1107 IS isrow_s,iscol_s; 1108 PetscBool rsorted,csorted; 1109 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1110 PetscErrorCode ierr; 1111 1112 PetscFunctionBegin; 1113 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1114 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1115 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1116 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1117 1118 if (!rsorted) { 1119 const PetscInt *idxs; 1120 PetscInt *idxs_sorted,i; 1121 1122 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1123 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1124 for (i=0;i<rsize;i++) { 1125 idxs_perm_r[i] = i; 1126 } 1127 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1128 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1129 for (i=0;i<rsize;i++) { 1130 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1131 } 1132 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1133 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1134 } else { 1135 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1136 isrow_s = isrow; 1137 } 1138 1139 if (!csorted) { 1140 if (isrow == iscol) { 1141 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1142 iscol_s = isrow_s; 1143 } else { 1144 const PetscInt *idxs; 1145 PetscInt *idxs_sorted,i; 1146 1147 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1148 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1149 for (i=0;i<csize;i++) { 1150 idxs_perm_c[i] = i; 1151 } 1152 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1153 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1154 for (i=0;i<csize;i++) { 1155 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1156 } 1157 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1158 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1159 } 1160 } else { 1161 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1162 iscol_s = iscol; 1163 } 1164 1165 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1166 1167 if (!rsorted || !csorted) { 1168 Mat new_mat; 1169 IS is_perm_r,is_perm_c; 1170 1171 if (!rsorted) { 1172 PetscInt *idxs_r,i; 1173 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1174 for (i=0;i<rsize;i++) { 1175 idxs_r[idxs_perm_r[i]] = i; 1176 } 1177 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1178 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1179 } else { 1180 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1181 } 1182 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1183 1184 if (!csorted) { 1185 if (isrow_s == iscol_s) { 1186 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1187 is_perm_c = is_perm_r; 1188 } else { 1189 PetscInt *idxs_c,i; 1190 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1191 for (i=0;i<csize;i++) { 1192 idxs_c[idxs_perm_c[i]] = i; 1193 } 1194 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1195 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1196 } 1197 } else { 1198 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1199 } 1200 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1201 1202 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1203 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1204 work_mat[0] = new_mat; 1205 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1206 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1207 } 1208 1209 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1210 *B = work_mat[0]; 1211 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1212 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1213 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1219 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1220 { 1221 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1222 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1223 Mat new_mat; 1224 IS is_local,is_global; 1225 PetscInt local_size; 1226 PetscBool isseqaij; 1227 PetscErrorCode ierr; 1228 1229 PetscFunctionBegin; 1230 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1231 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1232 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1233 ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr); 1234 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1235 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1236 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1237 1238 /* check */ 1239 if (pcbddc->dbg_flag) { 1240 Vec x,x_change; 1241 PetscReal error; 1242 1243 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1244 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1245 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1246 ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1247 ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1248 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1249 ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1250 ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1251 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1252 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1253 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1254 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1255 ierr = VecDestroy(&x);CHKERRQ(ierr); 1256 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1257 } 1258 1259 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1260 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1261 if (isseqaij) { 1262 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1263 } else { 1264 Mat work_mat; 1265 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1266 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1267 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1268 } 1269 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 1270 #if !defined(PETSC_USE_COMPLEX) 1271 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr); 1272 #endif 1273 /* 1274 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1275 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1276 */ 1277 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1283 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1284 { 1285 PC_IS* pcis = (PC_IS*)(pc->data); 1286 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1287 IS is_aux1,is_aux2; 1288 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1289 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1290 PetscInt vbs,bs; 1291 PetscBT bitmask; 1292 PetscErrorCode ierr; 1293 1294 PetscFunctionBegin; 1295 /* 1296 No need to setup local scatters if 1297 - primal space is unchanged 1298 AND 1299 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1300 AND 1301 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1302 */ 1303 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1304 PetscFunctionReturn(0); 1305 } 1306 /* destroy old objects */ 1307 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1308 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1309 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1310 /* Set Non-overlapping dimensions */ 1311 n_B = pcis->n_B; 1312 n_D = pcis->n - n_B; 1313 n_vertices = pcbddc->n_vertices; 1314 /* create auxiliary bitmask */ 1315 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1316 for (i=0;i<n_vertices;i++) { 1317 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1318 } 1319 1320 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1321 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1322 for (i=0, n_R=0; i<pcis->n; i++) { 1323 if (!PetscBTLookup(bitmask,i)) { 1324 idx_R_local[n_R] = i; 1325 n_R++; 1326 } 1327 } 1328 1329 /* Block code */ 1330 vbs = 1; 1331 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1332 if (bs>1 && !(n_vertices%bs)) { 1333 PetscBool is_blocked = PETSC_TRUE; 1334 PetscInt *vary; 1335 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1336 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1337 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1338 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1339 for (i=0; i<n_vertices/bs; i++) { 1340 if (vary[i]!=0 && vary[i]!=bs) { 1341 is_blocked = PETSC_FALSE; 1342 break; 1343 } 1344 } 1345 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1346 vbs = bs; 1347 for (i=0;i<n_R/vbs;i++) { 1348 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1349 } 1350 } 1351 ierr = PetscFree(vary);CHKERRQ(ierr); 1352 } 1353 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1354 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1355 1356 /* print some info if requested */ 1357 if (pcbddc->dbg_flag) { 1358 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1359 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1362 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1363 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 1364 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1365 } 1366 1367 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1368 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1369 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1370 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1371 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1372 for (i=0; i<n_D; i++) { 1373 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1374 } 1375 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1376 for (i=0, j=0; i<n_R; i++) { 1377 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1378 aux_array1[j++] = i; 1379 } 1380 } 1381 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1382 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1383 for (i=0, j=0; i<n_B; i++) { 1384 if (!PetscBTLookup(bitmask,is_indices[i])) { 1385 aux_array2[j++] = i; 1386 } 1387 } 1388 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1389 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1390 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1391 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1392 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1393 1394 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1395 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1396 for (i=0, j=0; i<n_R; i++) { 1397 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1398 aux_array1[j++] = i; 1399 } 1400 } 1401 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1402 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1403 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1404 } 1405 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1406 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1413 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1414 { 1415 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1416 PC_IS *pcis = (PC_IS*)pc->data; 1417 PC pc_temp; 1418 Mat A_RR; 1419 MatReuse reuse; 1420 PetscScalar m_one = -1.0; 1421 PetscReal value; 1422 PetscInt n_D,n_R,ibs,mbs; 1423 PetscBool use_exact,use_exact_reduced,issbaij; 1424 PetscErrorCode ierr; 1425 /* prefixes stuff */ 1426 char dir_prefix[256],neu_prefix[256],str_level[16]; 1427 size_t len; 1428 1429 PetscFunctionBegin; 1430 1431 /* compute prefixes */ 1432 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1433 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1434 if (!pcbddc->current_level) { 1435 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1436 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1437 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1438 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1439 } else { 1440 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1441 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1442 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1443 len -= 15; /* remove "pc_bddc_coarse_" */ 1444 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1445 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1446 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1447 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1448 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1449 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1450 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1451 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1452 } 1453 1454 /* DIRICHLET PROBLEM */ 1455 if (dirichlet) { 1456 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1457 if (pcbddc->issym) { 1458 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1459 } 1460 /* Matrix for Dirichlet problem is pcis->A_II */ 1461 n_D = pcis->n - pcis->n_B; 1462 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1463 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1464 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1465 /* default */ 1466 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1467 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1468 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1469 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1470 if (issbaij) { 1471 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1472 } else { 1473 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1474 } 1475 /* Allow user's customization */ 1476 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1477 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1478 } 1479 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1480 if (sub_schurs->interior_solver) { 1481 ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr); 1482 } 1483 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1484 if (!n_D) { 1485 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1486 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1487 } 1488 /* Set Up KSP for Dirichlet problem of BDDC */ 1489 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1490 /* set ksp_D into pcis data */ 1491 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1492 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1493 pcis->ksp_D = pcbddc->ksp_D; 1494 } 1495 1496 /* NEUMANN PROBLEM */ 1497 A_RR = 0; 1498 if (neumann) { 1499 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1500 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1501 if (pcbddc->ksp_R) { /* already created ksp */ 1502 PetscInt nn_R; 1503 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1504 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1505 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1506 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1507 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1508 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1509 reuse = MAT_INITIAL_MATRIX; 1510 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1511 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1512 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1513 reuse = MAT_INITIAL_MATRIX; 1514 } else { /* safe to reuse the matrix */ 1515 reuse = MAT_REUSE_MATRIX; 1516 } 1517 } 1518 /* last check */ 1519 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1520 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1521 reuse = MAT_INITIAL_MATRIX; 1522 } 1523 } else { /* first time, so we need to create the matrix */ 1524 reuse = MAT_INITIAL_MATRIX; 1525 } 1526 /* extract A_RR */ 1527 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1528 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1529 if (ibs != mbs) { 1530 Mat newmat; 1531 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1532 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1533 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1534 } else { 1535 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1536 } 1537 if (pcbddc->issym) { 1538 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1539 } 1540 if (!pcbddc->ksp_R) { /* create object if not present */ 1541 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1542 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1543 /* default */ 1544 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1545 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1546 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1547 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1548 if (issbaij) { 1549 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1550 } else { 1551 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1552 } 1553 /* Allow user's customization */ 1554 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1555 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1556 } 1557 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 1558 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1559 if (!n_R) { 1560 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1561 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1562 } 1563 /* Set Up KSP for Neumann problem of BDDC */ 1564 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1565 } 1566 1567 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1568 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1569 if (pcbddc->dbg_flag) { 1570 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1571 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1572 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1573 } 1574 if (dirichlet) { /* Dirichlet */ 1575 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1576 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1577 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1578 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1579 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1580 /* need to be adapted? */ 1581 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1582 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1583 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1584 /* print info */ 1585 if (pcbddc->dbg_flag) { 1586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1588 } 1589 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1590 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1591 } 1592 } 1593 if (neumann) { /* Neumann */ 1594 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1595 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1596 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1597 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1598 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1599 /* need to be adapted? */ 1600 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1601 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1602 /* print info */ 1603 if (pcbddc->dbg_flag) { 1604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1605 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1606 } 1607 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1608 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1609 } 1610 } 1611 } 1612 /* free Neumann problem's matrix */ 1613 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 1619 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 1620 { 1621 PetscErrorCode ierr; 1622 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1623 1624 PetscFunctionBegin; 1625 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 1626 if (!pcbddc->switch_static) { 1627 if (applytranspose && pcbddc->local_auxmat1) { 1628 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1629 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1630 } 1631 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1632 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1633 } else { 1634 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1635 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1636 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1637 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1638 if (applytranspose && pcbddc->local_auxmat1) { 1639 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 1640 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1641 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1642 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1643 } 1644 } 1645 if (applytranspose) { 1646 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1647 } else { 1648 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1649 } 1650 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 1651 if (!pcbddc->switch_static) { 1652 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 if (!applytranspose && pcbddc->local_auxmat1) { 1655 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1656 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 1657 } 1658 } else { 1659 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1660 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1663 if (!applytranspose && pcbddc->local_auxmat1) { 1664 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 1665 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 1666 } 1667 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1668 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1670 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1671 } 1672 PetscFunctionReturn(0); 1673 } 1674 1675 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1678 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 1679 { 1680 PetscErrorCode ierr; 1681 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1682 PC_IS* pcis = (PC_IS*) (pc->data); 1683 const PetscScalar zero = 0.0; 1684 1685 PetscFunctionBegin; 1686 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 1687 if (applytranspose) { 1688 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1689 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1690 } else { 1691 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1692 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1693 } 1694 /* start communications from local primal nodes to rhs of coarse solver */ 1695 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 1696 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1697 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1698 1699 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 1700 /* TODO remove null space when doing multilevel */ 1701 if (pcbddc->coarse_ksp) { 1702 Vec rhs,sol; 1703 1704 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 1705 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 1706 if (applytranspose) { 1707 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1708 } else { 1709 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 1710 } 1711 } 1712 1713 /* Local solution on R nodes */ 1714 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 1715 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 1716 } 1717 1718 /* communications from coarse sol to local primal nodes */ 1719 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1720 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1721 1722 /* Sum contributions from two levels */ 1723 if (applytranspose) { 1724 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1725 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1726 } else { 1727 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1728 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1729 } 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1735 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 1736 { 1737 PetscErrorCode ierr; 1738 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1739 PetscScalar *array; 1740 Vec from,to; 1741 1742 PetscFunctionBegin; 1743 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1744 from = pcbddc->coarse_vec; 1745 to = pcbddc->vec1_P; 1746 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1747 Vec tvec; 1748 1749 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1750 ierr = VecResetArray(tvec);CHKERRQ(ierr); 1751 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1752 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 1753 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 1754 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 1755 } 1756 } else { /* from local to global -> put data in coarse right hand side */ 1757 from = pcbddc->vec1_P; 1758 to = pcbddc->coarse_vec; 1759 } 1760 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1766 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 1767 { 1768 PetscErrorCode ierr; 1769 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1770 PetscScalar *array; 1771 Vec from,to; 1772 1773 PetscFunctionBegin; 1774 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 1775 from = pcbddc->coarse_vec; 1776 to = pcbddc->vec1_P; 1777 } else { /* from local to global -> put data in coarse right hand side */ 1778 from = pcbddc->vec1_P; 1779 to = pcbddc->coarse_vec; 1780 } 1781 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 1782 if (smode == SCATTER_FORWARD) { 1783 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 1784 Vec tvec; 1785 1786 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 1787 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 1788 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 1789 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 1790 } 1791 } else { 1792 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 1793 ierr = VecResetArray(from);CHKERRQ(ierr); 1794 } 1795 } 1796 PetscFunctionReturn(0); 1797 } 1798 1799 /* uncomment for testing purposes */ 1800 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1803 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1804 { 1805 PetscErrorCode ierr; 1806 PC_IS* pcis = (PC_IS*)(pc->data); 1807 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1808 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1809 /* one and zero */ 1810 PetscScalar one=1.0,zero=0.0; 1811 /* space to store constraints and their local indices */ 1812 PetscScalar *constraints_data; 1813 PetscInt *constraints_idxs,*constraints_idxs_B; 1814 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 1815 PetscInt *constraints_n; 1816 /* iterators */ 1817 PetscInt i,j,k,total_counts,total_counts_cc,cum; 1818 /* BLAS integers */ 1819 PetscBLASInt lwork,lierr; 1820 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1821 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1822 /* reuse */ 1823 PetscInt olocal_primal_size,olocal_primal_size_cc; 1824 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 1825 /* change of basis */ 1826 PetscBool qr_needed; 1827 PetscBT change_basis,qr_needed_idx; 1828 /* auxiliary stuff */ 1829 PetscInt *nnz,*is_indices; 1830 PetscInt ncc; 1831 /* some quantities */ 1832 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1833 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 1834 1835 PetscFunctionBegin; 1836 /* Destroy Mat objects computed previously */ 1837 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1838 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1839 /* save info on constraints from previous setup (if any) */ 1840 olocal_primal_size = pcbddc->local_primal_size; 1841 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 1842 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 1843 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1844 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 1845 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1846 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1847 1848 /* print some info */ 1849 if (pcbddc->dbg_flag) { 1850 IS vertices; 1851 PetscInt nv,nedges,nfaces; 1852 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 1853 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 1854 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 1855 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1857 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 1858 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 1859 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 1860 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1861 } 1862 1863 if (!pcbddc->adaptive_selection) { 1864 IS ISForVertices,*ISForFaces,*ISForEdges; 1865 MatNullSpace nearnullsp; 1866 const Vec *nearnullvecs; 1867 Vec *localnearnullsp; 1868 PetscScalar *array; 1869 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 1870 PetscBool nnsp_has_cnst; 1871 /* LAPACK working arrays for SVD or POD */ 1872 PetscBool skip_lapack,boolforchange; 1873 PetscScalar *work; 1874 PetscReal *singular_vals; 1875 #if defined(PETSC_USE_COMPLEX) 1876 PetscReal *rwork; 1877 #endif 1878 #if defined(PETSC_MISSING_LAPACK_GESVD) 1879 PetscScalar *temp_basis,*correlation_mat; 1880 #else 1881 PetscBLASInt dummy_int=1; 1882 PetscScalar dummy_scalar=1.; 1883 #endif 1884 1885 /* Get index sets for faces, edges and vertices from graph */ 1886 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 1887 /* free unneeded index sets */ 1888 if (!pcbddc->use_vertices) { 1889 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1890 } 1891 if (!pcbddc->use_edges) { 1892 for (i=0;i<n_ISForEdges;i++) { 1893 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1894 } 1895 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1896 n_ISForEdges = 0; 1897 } 1898 if (!pcbddc->use_faces) { 1899 for (i=0;i<n_ISForFaces;i++) { 1900 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1901 } 1902 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1903 n_ISForFaces = 0; 1904 } 1905 1906 #if defined(PETSC_USE_DEBUG) 1907 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 1908 Also use_change_of_basis should be consistent among processors */ 1909 if (pcbddc->NullSpace) { 1910 PetscBool tbool[2],gbool[2]; 1911 1912 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 1913 pcbddc->use_change_of_basis = PETSC_TRUE; 1914 if (!ISForEdges) { 1915 pcbddc->use_change_on_faces = PETSC_TRUE; 1916 } 1917 } 1918 tbool[0] = pcbddc->use_change_of_basis; 1919 tbool[1] = pcbddc->use_change_on_faces; 1920 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1921 pcbddc->use_change_of_basis = gbool[0]; 1922 pcbddc->use_change_on_faces = gbool[1]; 1923 } 1924 #endif 1925 1926 /* check if near null space is attached to global mat */ 1927 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1928 if (nearnullsp) { 1929 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1930 /* remove any stored info */ 1931 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1932 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1933 /* store information for BDDC solver reuse */ 1934 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1935 pcbddc->onearnullspace = nearnullsp; 1936 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1937 for (i=0;i<nnsp_size;i++) { 1938 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1939 } 1940 } else { /* if near null space is not provided BDDC uses constants by default */ 1941 nnsp_size = 0; 1942 nnsp_has_cnst = PETSC_TRUE; 1943 } 1944 /* get max number of constraints on a single cc */ 1945 max_constraints = nnsp_size; 1946 if (nnsp_has_cnst) max_constraints++; 1947 1948 /* 1949 Evaluate maximum storage size needed by the procedure 1950 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 1951 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 1952 There can be multiple constraints per connected component 1953 */ 1954 n_vertices = 0; 1955 if (ISForVertices) { 1956 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1957 } 1958 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 1959 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 1960 1961 total_counts = n_ISForFaces+n_ISForEdges; 1962 total_counts *= max_constraints; 1963 total_counts += n_vertices; 1964 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1965 1966 total_counts = 0; 1967 max_size_of_constraint = 0; 1968 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1969 IS used_is; 1970 if (i<n_ISForEdges) { 1971 used_is = ISForEdges[i]; 1972 } else { 1973 used_is = ISForFaces[i-n_ISForEdges]; 1974 } 1975 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 1976 total_counts += j; 1977 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1978 } 1979 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 1980 1981 /* get local part of global near null space vectors */ 1982 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 1983 for (k=0;k<nnsp_size;k++) { 1984 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1985 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 } 1988 1989 /* whether or not to skip lapack calls */ 1990 skip_lapack = PETSC_TRUE; 1991 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 1992 1993 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1994 if (!skip_lapack) { 1995 PetscScalar temp_work; 1996 1997 #if defined(PETSC_MISSING_LAPACK_GESVD) 1998 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1999 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2000 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2001 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2002 #if defined(PETSC_USE_COMPLEX) 2003 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2004 #endif 2005 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2006 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2007 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2008 lwork = -1; 2009 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2010 #if !defined(PETSC_USE_COMPLEX) 2011 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2012 #else 2013 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2014 #endif 2015 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2016 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2017 #else /* on missing GESVD */ 2018 /* SVD */ 2019 PetscInt max_n,min_n; 2020 max_n = max_size_of_constraint; 2021 min_n = max_constraints; 2022 if (max_size_of_constraint < max_constraints) { 2023 min_n = max_size_of_constraint; 2024 max_n = max_constraints; 2025 } 2026 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2027 #if defined(PETSC_USE_COMPLEX) 2028 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2029 #endif 2030 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2031 lwork = -1; 2032 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2033 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2034 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2035 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2036 #if !defined(PETSC_USE_COMPLEX) 2037 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2038 #else 2039 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2040 #endif 2041 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2042 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2043 #endif /* on missing GESVD */ 2044 /* Allocate optimal workspace */ 2045 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2046 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2047 } 2048 /* Now we can loop on constraining sets */ 2049 total_counts = 0; 2050 constraints_idxs_ptr[0] = 0; 2051 constraints_data_ptr[0] = 0; 2052 /* vertices */ 2053 if (n_vertices) { 2054 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2055 if (nnsp_has_cnst) { /* it considers all possible vertices */ 2056 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2057 for (i=0;i<n_vertices;i++) { 2058 constraints_n[total_counts] = 1; 2059 constraints_data[total_counts] = 1.0; 2060 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2061 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2062 total_counts++; 2063 } 2064 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 2065 PetscBool used_vertex; 2066 for (i=0;i<n_vertices;i++) { 2067 used_vertex = PETSC_FALSE; 2068 k = 0; 2069 while (!used_vertex && k<nnsp_size) { 2070 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2071 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 2072 constraints_n[total_counts] = 1; 2073 constraints_idxs[total_counts] = is_indices[i]; 2074 constraints_data[total_counts] = 1.0; 2075 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2076 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2077 total_counts++; 2078 used_vertex = PETSC_TRUE; 2079 } 2080 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2081 k++; 2082 } 2083 } 2084 } 2085 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2086 n_vertices = total_counts; 2087 } 2088 2089 /* edges and faces */ 2090 total_counts_cc = total_counts; 2091 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2092 IS used_is; 2093 PetscBool idxs_copied = PETSC_FALSE; 2094 2095 if (ncc<n_ISForEdges) { 2096 used_is = ISForEdges[ncc]; 2097 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2098 } else { 2099 used_is = ISForFaces[ncc-n_ISForEdges]; 2100 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2101 } 2102 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2103 2104 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2105 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2106 /* change of basis should not be performed on local periodic nodes */ 2107 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2108 if (nnsp_has_cnst) { 2109 PetscScalar quad_value; 2110 2111 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2112 idxs_copied = PETSC_TRUE; 2113 2114 if (!pcbddc->use_nnsp_true) { 2115 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2116 } else { 2117 quad_value = 1.0; 2118 } 2119 for (j=0;j<size_of_constraint;j++) { 2120 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2121 } 2122 temp_constraints++; 2123 total_counts++; 2124 } 2125 for (k=0;k<nnsp_size;k++) { 2126 PetscReal real_value; 2127 PetscScalar *ptr_to_data; 2128 2129 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2130 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2131 for (j=0;j<size_of_constraint;j++) { 2132 ptr_to_data[j] = array[is_indices[j]]; 2133 } 2134 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2135 /* check if array is null on the connected component */ 2136 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2137 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2138 if (real_value > 0.0) { /* keep indices and values */ 2139 temp_constraints++; 2140 total_counts++; 2141 if (!idxs_copied) { 2142 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2143 idxs_copied = PETSC_TRUE; 2144 } 2145 } 2146 } 2147 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2148 valid_constraints = temp_constraints; 2149 if (!pcbddc->use_nnsp_true && temp_constraints) { 2150 if (temp_constraints == 1) { /* just normalize the constraint */ 2151 PetscScalar norm,*ptr_to_data; 2152 2153 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2154 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2155 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2156 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2157 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2158 } else { /* perform SVD */ 2159 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2160 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2161 2162 #if defined(PETSC_MISSING_LAPACK_GESVD) 2163 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2164 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2165 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2166 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2167 from that computed using LAPACKgesvd 2168 -> This is due to a different computation of eigenvectors in LAPACKheev 2169 -> The quality of the POD-computed basis will be the same */ 2170 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2171 /* Store upper triangular part of correlation matrix */ 2172 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2173 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2174 for (j=0;j<temp_constraints;j++) { 2175 for (k=0;k<j+1;k++) { 2176 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2177 } 2178 } 2179 /* compute eigenvalues and eigenvectors of correlation matrix */ 2180 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2181 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2182 #if !defined(PETSC_USE_COMPLEX) 2183 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2184 #else 2185 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2186 #endif 2187 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2188 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2189 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2190 j = 0; 2191 while (j < temp_constraints && singular_vals[j] < tol) j++; 2192 total_counts = total_counts-j; 2193 valid_constraints = temp_constraints-j; 2194 /* scale and copy POD basis into used quadrature memory */ 2195 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2196 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2197 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2198 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2199 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2200 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2201 if (j<temp_constraints) { 2202 PetscInt ii; 2203 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2204 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2205 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2206 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2207 for (k=0;k<temp_constraints-j;k++) { 2208 for (ii=0;ii<size_of_constraint;ii++) { 2209 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2210 } 2211 } 2212 } 2213 #else /* on missing GESVD */ 2214 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2215 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2216 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2217 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2218 #if !defined(PETSC_USE_COMPLEX) 2219 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2220 #else 2221 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2222 #endif 2223 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2224 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2225 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2226 k = temp_constraints; 2227 if (k > size_of_constraint) k = size_of_constraint; 2228 j = 0; 2229 while (j < k && singular_vals[k-j-1] < tol) j++; 2230 valid_constraints = k-j; 2231 total_counts = total_counts-temp_constraints+valid_constraints; 2232 #endif /* on missing GESVD */ 2233 } 2234 } 2235 /* update pointers information */ 2236 if (valid_constraints) { 2237 constraints_n[total_counts_cc] = valid_constraints; 2238 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2239 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2240 /* set change_of_basis flag */ 2241 if (boolforchange) { 2242 PetscBTSet(change_basis,total_counts_cc); 2243 } 2244 total_counts_cc++; 2245 } 2246 } 2247 /* free workspace */ 2248 if (!skip_lapack) { 2249 ierr = PetscFree(work);CHKERRQ(ierr); 2250 #if defined(PETSC_USE_COMPLEX) 2251 ierr = PetscFree(rwork);CHKERRQ(ierr); 2252 #endif 2253 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2254 #if defined(PETSC_MISSING_LAPACK_GESVD) 2255 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2256 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2257 #endif 2258 } 2259 for (k=0;k<nnsp_size;k++) { 2260 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2261 } 2262 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2263 /* free index sets of faces, edges and vertices */ 2264 for (i=0;i<n_ISForFaces;i++) { 2265 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2266 } 2267 if (n_ISForFaces) { 2268 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2269 } 2270 for (i=0;i<n_ISForEdges;i++) { 2271 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2272 } 2273 if (n_ISForEdges) { 2274 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2275 } 2276 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2277 } else { 2278 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2279 2280 total_counts = 0; 2281 n_vertices = 0; 2282 if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { 2283 ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr); 2284 } 2285 max_constraints = 0; 2286 total_counts_cc = 0; 2287 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2288 total_counts += pcbddc->adaptive_constraints_n[i]; 2289 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2290 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2291 } 2292 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2293 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2294 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2295 constraints_data = pcbddc->adaptive_constraints_data; 2296 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2297 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2298 total_counts_cc = 0; 2299 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2300 if (pcbddc->adaptive_constraints_n[i]) { 2301 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2302 } 2303 } 2304 #if 0 2305 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2306 for (i=0;i<total_counts_cc;i++) { 2307 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2308 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2309 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2310 printf(" %d",constraints_idxs[j]); 2311 } 2312 printf("\n"); 2313 printf("number of cc: %d\n",constraints_n[i]); 2314 } 2315 for (i=0;i<n_vertices;i++) { 2316 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2317 } 2318 for (i=0;i<sub_schurs->n_subs;i++) { 2319 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2320 } 2321 #endif 2322 2323 max_size_of_constraint = 0; 2324 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2325 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2326 /* Change of basis */ 2327 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2328 if (pcbddc->use_change_of_basis) { 2329 for (i=0;i<sub_schurs->n_subs;i++) { 2330 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2331 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2332 } 2333 } 2334 } 2335 } 2336 pcbddc->local_primal_size = total_counts; 2337 ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2338 2339 /* map constraints_idxs in boundary numbering */ 2340 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2341 if (i != constraints_idxs_ptr[total_counts_cc]) { 2342 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2343 } 2344 2345 /* Create constraint matrix */ 2346 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2347 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2348 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2349 2350 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2351 /* determine if a QR strategy is needed for change of basis */ 2352 qr_needed = PETSC_FALSE; 2353 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2354 total_primal_vertices=0; 2355 pcbddc->local_primal_size_cc = 0; 2356 for (i=0;i<total_counts_cc;i++) { 2357 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2358 if (size_of_constraint == 1) { 2359 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2360 pcbddc->local_primal_size_cc += 1; 2361 } else if (PetscBTLookup(change_basis,i)) { 2362 for (k=0;k<constraints_n[i];k++) { 2363 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2364 } 2365 pcbddc->local_primal_size_cc += constraints_n[i]; 2366 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2367 PetscBTSet(qr_needed_idx,i); 2368 qr_needed = PETSC_TRUE; 2369 } 2370 } else { 2371 pcbddc->local_primal_size_cc += 1; 2372 } 2373 } 2374 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2375 pcbddc->n_vertices = total_primal_vertices; 2376 /* permute indices in order to have a sorted set of vertices */ 2377 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2378 2379 ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2380 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2381 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2382 2383 /* nonzero structure of constraint matrix */ 2384 /* and get reference dof for local constraints */ 2385 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2386 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2387 2388 j = total_primal_vertices; 2389 total_counts = total_primal_vertices; 2390 cum = total_primal_vertices; 2391 for (i=n_vertices;i<total_counts_cc;i++) { 2392 if (!PetscBTLookup(change_basis,i)) { 2393 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2394 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2395 cum++; 2396 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2397 for (k=0;k<constraints_n[i];k++) { 2398 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2399 nnz[j+k] = size_of_constraint; 2400 } 2401 j += constraints_n[i]; 2402 } 2403 } 2404 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2405 ierr = PetscFree(nnz);CHKERRQ(ierr); 2406 2407 /* set values in constraint matrix */ 2408 for (i=0;i<total_primal_vertices;i++) { 2409 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2410 } 2411 total_counts = total_primal_vertices; 2412 for (i=n_vertices;i<total_counts_cc;i++) { 2413 if (!PetscBTLookup(change_basis,i)) { 2414 PetscInt *cols; 2415 2416 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2417 cols = constraints_idxs+constraints_idxs_ptr[i]; 2418 for (k=0;k<constraints_n[i];k++) { 2419 PetscInt row = total_counts+k; 2420 PetscScalar *vals; 2421 2422 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2423 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2424 } 2425 total_counts += constraints_n[i]; 2426 } 2427 } 2428 /* assembling */ 2429 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2430 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2431 2432 /* 2433 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2434 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2435 */ 2436 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2437 if (pcbddc->use_change_of_basis) { 2438 /* dual and primal dofs on a single cc */ 2439 PetscInt dual_dofs,primal_dofs; 2440 /* working stuff for GEQRF */ 2441 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2442 PetscBLASInt lqr_work; 2443 /* working stuff for UNGQR */ 2444 PetscScalar *gqr_work,lgqr_work_t; 2445 PetscBLASInt lgqr_work; 2446 /* working stuff for TRTRS */ 2447 PetscScalar *trs_rhs; 2448 PetscBLASInt Blas_NRHS; 2449 /* pointers for values insertion into change of basis matrix */ 2450 PetscInt *start_rows,*start_cols; 2451 PetscScalar *start_vals; 2452 /* working stuff for values insertion */ 2453 PetscBT is_primal; 2454 PetscInt *aux_primal_numbering_B; 2455 /* matrix sizes */ 2456 PetscInt global_size,local_size; 2457 /* temporary change of basis */ 2458 Mat localChangeOfBasisMatrix; 2459 /* extra space for debugging */ 2460 PetscScalar *dbg_work; 2461 2462 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2463 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2464 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2465 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2466 /* nonzeros for local mat */ 2467 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2468 for (i=0;i<pcis->n;i++) nnz[i]=1; 2469 for (i=n_vertices;i<total_counts_cc;i++) { 2470 if (PetscBTLookup(change_basis,i)) { 2471 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2472 if (PetscBTLookup(qr_needed_idx,i)) { 2473 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2474 } else { 2475 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2476 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2477 } 2478 } 2479 } 2480 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2481 ierr = PetscFree(nnz);CHKERRQ(ierr); 2482 /* Set initial identity in the matrix */ 2483 for (i=0;i<pcis->n;i++) { 2484 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2485 } 2486 2487 if (pcbddc->dbg_flag) { 2488 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2489 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2490 } 2491 2492 2493 /* Now we loop on the constraints which need a change of basis */ 2494 /* 2495 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2496 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2497 2498 Basic blocks of change of basis matrix T computed by 2499 2500 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2501 2502 | 1 0 ... 0 s_1/S | 2503 | 0 1 ... 0 s_2/S | 2504 | ... | 2505 | 0 ... 1 s_{n-1}/S | 2506 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2507 2508 with S = \sum_{i=1}^n s_i^2 2509 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2510 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2511 2512 - QR decomposition of constraints otherwise 2513 */ 2514 if (qr_needed) { 2515 /* space to store Q */ 2516 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2517 /* first we issue queries for optimal work */ 2518 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2519 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2520 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2521 lqr_work = -1; 2522 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2523 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 2524 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 2525 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 2526 lgqr_work = -1; 2527 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2528 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 2529 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 2530 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2531 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 2532 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 2533 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 2534 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 2535 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 2536 /* array to store scaling factors for reflectors */ 2537 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 2538 /* array to store rhs and solution of triangular solver */ 2539 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 2540 /* allocating workspace for check */ 2541 if (pcbddc->dbg_flag) { 2542 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 2543 } 2544 } 2545 /* array to store whether a node is primal or not */ 2546 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 2547 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 2548 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 2549 if (i != total_primal_vertices) { 2550 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 2551 } 2552 for (i=0;i<total_primal_vertices;i++) { 2553 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 2554 } 2555 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 2556 2557 /* loop on constraints and see whether or not they need a change of basis and compute it */ 2558 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 2559 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 2560 if (PetscBTLookup(change_basis,total_counts)) { 2561 /* get constraint info */ 2562 primal_dofs = constraints_n[total_counts]; 2563 dual_dofs = size_of_constraint-primal_dofs; 2564 2565 if (pcbddc->dbg_flag) { 2566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 2567 } 2568 2569 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 2570 2571 /* copy quadrature constraints for change of basis check */ 2572 if (pcbddc->dbg_flag) { 2573 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2574 } 2575 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 2576 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2577 2578 /* compute QR decomposition of constraints */ 2579 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2580 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2581 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2582 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2583 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 2584 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 2585 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2586 2587 /* explictly compute R^-T */ 2588 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 2589 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 2590 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2591 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 2592 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2593 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2594 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2595 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 2596 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 2597 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2598 2599 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 2600 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2601 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2602 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2603 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2604 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2605 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 2606 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 2607 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2608 2609 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 2610 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 2611 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 2612 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2613 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 2614 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 2615 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2616 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 2617 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2618 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2619 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 2620 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2621 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 2622 2623 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 2624 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 2625 /* insert cols for primal dofs */ 2626 for (j=0;j<primal_dofs;j++) { 2627 start_vals = &qr_basis[j*size_of_constraint]; 2628 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2629 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2630 } 2631 /* insert cols for dual dofs */ 2632 for (j=0,k=0;j<dual_dofs;k++) { 2633 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 2634 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 2635 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2636 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2637 j++; 2638 } 2639 } 2640 2641 /* check change of basis */ 2642 if (pcbddc->dbg_flag) { 2643 PetscInt ii,jj; 2644 PetscBool valid_qr=PETSC_TRUE; 2645 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2646 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2647 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2648 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2649 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2650 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2651 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2652 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 2653 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2654 for (jj=0;jj<size_of_constraint;jj++) { 2655 for (ii=0;ii<primal_dofs;ii++) { 2656 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2657 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2658 } 2659 } 2660 if (!valid_qr) { 2661 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 2662 for (jj=0;jj<size_of_constraint;jj++) { 2663 for (ii=0;ii<primal_dofs;ii++) { 2664 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2665 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2666 } 2667 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2668 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2669 } 2670 } 2671 } 2672 } else { 2673 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 2674 } 2675 } 2676 } else { /* simple transformation block */ 2677 PetscInt row,col; 2678 PetscScalar val,norm; 2679 2680 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2681 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 2682 for (j=0;j<size_of_constraint;j++) { 2683 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 2684 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 2685 if (!PetscBTLookup(is_primal,row_B)) { 2686 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 2687 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2688 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 2689 } else { 2690 for (k=0;k<size_of_constraint;k++) { 2691 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 2692 if (row != col) { 2693 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 2694 } else { 2695 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 2696 } 2697 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2698 } 2699 } 2700 } 2701 if (pcbddc->dbg_flag) { 2702 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 2703 } 2704 } 2705 } else { 2706 if (pcbddc->dbg_flag) { 2707 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 2708 } 2709 } 2710 } 2711 2712 /* free workspace */ 2713 if (qr_needed) { 2714 if (pcbddc->dbg_flag) { 2715 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 2716 } 2717 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2718 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2719 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2720 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2721 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2722 } 2723 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2724 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2725 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2726 2727 /* assembling of global change of variable */ 2728 { 2729 Mat tmat; 2730 PetscInt bs; 2731 2732 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2733 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2734 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 2735 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 2736 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2737 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2738 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 2739 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 2740 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2741 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 2742 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2743 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2744 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 2745 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 2746 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2747 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2748 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 2749 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 2750 } 2751 /* check */ 2752 if (pcbddc->dbg_flag) { 2753 PetscReal error; 2754 Vec x,x_change; 2755 2756 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 2757 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 2758 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2759 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 2760 ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2761 ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2762 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 2763 ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2764 ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2765 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 2766 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2767 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2768 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2769 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 2770 ierr = VecDestroy(&x);CHKERRQ(ierr); 2771 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2772 } 2773 2774 /* adapt sub_schurs computed (if any) */ 2775 if (pcbddc->use_deluxe_scaling) { 2776 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 2777 if (sub_schurs->S_Ej_all) { 2778 Mat S_new,tmat; 2779 IS is_all_N; 2780 2781 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 2782 ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr); 2783 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 2784 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2785 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 2786 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2787 sub_schurs->S_Ej_all = S_new; 2788 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2789 if (sub_schurs->sum_S_Ej_all) { 2790 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 2791 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 2792 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 2793 sub_schurs->sum_S_Ej_all = S_new; 2794 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 2795 } 2796 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 2797 } 2798 } 2799 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 2800 } else if (pcbddc->user_ChangeOfBasisMatrix) { 2801 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2802 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 2803 } 2804 2805 /* set up change of basis context */ 2806 if (pcbddc->ChangeOfBasisMatrix) { 2807 PCBDDCChange_ctx change_ctx; 2808 2809 if (!pcbddc->new_global_mat) { 2810 PetscInt global_size,local_size; 2811 2812 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 2813 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 2814 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 2815 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 2816 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 2817 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 2818 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 2819 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 2820 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 2821 } else { 2822 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 2823 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 2824 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 2825 } 2826 if (!pcbddc->user_ChangeOfBasisMatrix) { 2827 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2828 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 2829 } else { 2830 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 2831 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 2832 } 2833 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 2834 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 2835 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2836 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2837 } 2838 2839 /* check if a new primal space has been introduced */ 2840 pcbddc->new_primal_space_local = PETSC_TRUE; 2841 if (olocal_primal_size == pcbddc->local_primal_size) { 2842 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2843 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2844 if (!pcbddc->new_primal_space_local) { 2845 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2846 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 2847 } 2848 } 2849 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 2850 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2851 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2852 2853 /* flush dbg viewer */ 2854 if (pcbddc->dbg_flag) { 2855 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2856 } 2857 2858 /* free workspace */ 2859 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2860 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2861 if (!pcbddc->adaptive_selection) { 2862 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 2863 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 2864 } else { 2865 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 2866 pcbddc->adaptive_constraints_idxs_ptr, 2867 pcbddc->adaptive_constraints_data_ptr, 2868 pcbddc->adaptive_constraints_idxs, 2869 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 2870 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 2871 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 2872 } 2873 PetscFunctionReturn(0); 2874 } 2875 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2878 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2879 { 2880 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2881 PC_IS *pcis = (PC_IS*)pc->data; 2882 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2883 PetscInt ierr,i,vertex_size,N; 2884 PetscViewer viewer=pcbddc->dbg_viewer; 2885 2886 PetscFunctionBegin; 2887 /* Reset previously computed graph */ 2888 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2889 /* Init local Graph struct */ 2890 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 2891 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr); 2892 2893 /* Check validity of the csr graph passed in by the user */ 2894 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2895 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2896 } 2897 2898 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2899 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2900 PetscInt *xadj,*adjncy; 2901 PetscInt nvtxs; 2902 PetscBool flg_row=PETSC_FALSE; 2903 2904 if (pcbddc->use_local_adj) { 2905 2906 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2907 if (flg_row) { 2908 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2909 pcbddc->computed_rowadj = PETSC_TRUE; 2910 } 2911 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2912 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 2913 IS is_dummy; 2914 ISLocalToGlobalMapping l2gmap_dummy; 2915 PetscInt j,sum; 2916 PetscInt *cxadj,*cadjncy; 2917 const PetscInt *idxs; 2918 PCBDDCGraph graph; 2919 PetscBT is_on_boundary; 2920 2921 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 2922 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2923 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2924 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2925 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 2926 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2927 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2928 if (flg_row) { 2929 graph->xadj = xadj; 2930 graph->adjncy = adjncy; 2931 } 2932 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2933 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2934 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2935 2936 if (pcbddc->dbg_flag) { 2937 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 2938 for (i=0;i<graph->ncc;i++) { 2939 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 2940 } 2941 } 2942 2943 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 2944 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2945 for (i=0;i<pcis->n_B;i++) { 2946 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 2947 } 2948 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2949 2950 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 2951 sum = 0; 2952 for (i=0;i<graph->ncc;i++) { 2953 PetscInt sizecc = 0; 2954 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2955 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2956 sizecc++; 2957 } 2958 } 2959 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2960 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2961 cxadj[graph->queue[j]] = sizecc; 2962 } 2963 } 2964 sum += sizecc*sizecc; 2965 } 2966 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 2967 sum = 0; 2968 for (i=0;i<pcis->n;i++) { 2969 PetscInt temp = cxadj[i]; 2970 cxadj[i] = sum; 2971 sum += temp; 2972 } 2973 cxadj[pcis->n] = sum; 2974 for (i=0;i<graph->ncc;i++) { 2975 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 2976 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 2977 PetscInt k,sizecc = 0; 2978 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 2979 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 2980 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 2981 sizecc++; 2982 } 2983 } 2984 } 2985 } 2986 } 2987 if (sum) { 2988 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 2989 } else { 2990 ierr = PetscFree(cxadj);CHKERRQ(ierr); 2991 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 2992 } 2993 graph->xadj = 0; 2994 graph->adjncy = 0; 2995 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2996 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 2997 } 2998 } 2999 if (pcbddc->dbg_flag) { 3000 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3001 } 3002 3003 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3004 vertex_size = 1; 3005 if (pcbddc->user_provided_isfordofs) { 3006 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3007 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3008 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3009 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3010 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3011 } 3012 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3013 pcbddc->n_ISForDofs = 0; 3014 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3015 } 3016 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3017 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3018 } else { 3019 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3020 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3021 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3022 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3023 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3024 } 3025 } 3026 } 3027 3028 /* Setup of Graph */ 3029 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3030 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3031 } 3032 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3033 ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3034 } 3035 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3036 3037 /* Graph's connected components analysis */ 3038 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3039 3040 /* print some info to stdout */ 3041 if (pcbddc->dbg_flag) { 3042 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3043 } 3044 3045 /* mark topography has done */ 3046 pcbddc->recompute_topography = PETSC_FALSE; 3047 PetscFunctionReturn(0); 3048 } 3049 3050 #undef __FUNCT__ 3051 #define __FUNCT__ "PCBDDCSubsetNumbering" 3052 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 3053 { 3054 Vec local_vec,global_vec; 3055 IS seqis,paris; 3056 VecScatter scatter_ctx; 3057 PetscScalar *array; 3058 PetscInt *temp_global_dofs; 3059 PetscScalar globalsum; 3060 PetscInt i,j,s; 3061 PetscInt nlocals,first_index,old_index,max_local,max_global; 3062 PetscMPIInt rank_prec_comm,size_prec_comm; 3063 PetscInt *dof_sizes,*dof_displs; 3064 PetscBool first_found; 3065 PetscErrorCode ierr; 3066 3067 PetscFunctionBegin; 3068 /* mpi buffers */ 3069 ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr); 3070 ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr); 3071 j = ( !rank_prec_comm ? size_prec_comm : 0); 3072 ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr); 3073 /* get maximum size of subset */ 3074 ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr); 3075 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 3076 max_local = 0; 3077 for (i=0;i<n_local_dofs;i++) { 3078 if (max_local < temp_global_dofs[i] ) { 3079 max_local = temp_global_dofs[i]; 3080 } 3081 } 3082 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 3083 max_global++; 3084 max_local = 0; 3085 for (i=0;i<n_local_dofs;i++) { 3086 if (max_local < local_dofs[i] ) { 3087 max_local = local_dofs[i]; 3088 } 3089 } 3090 max_local++; 3091 /* allocate workspace */ 3092 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 3093 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 3094 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 3095 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 3096 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 3097 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 3098 /* create scatter */ 3099 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 3100 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 3101 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 3102 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 3103 ierr = ISDestroy(&paris);CHKERRQ(ierr); 3104 /* init array */ 3105 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 3106 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3107 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 3108 if (local_dofs_mult) { 3109 for (i=0;i<n_local_dofs;i++) { 3110 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 3111 } 3112 } else { 3113 for (i=0;i<n_local_dofs;i++) { 3114 array[local_dofs[i]]=1.0; 3115 } 3116 } 3117 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 3118 /* scatter into global vec and get total number of global dofs */ 3119 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3120 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3121 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 3122 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 3123 /* Fill global_vec with cumulative function for global numbering */ 3124 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 3125 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 3126 nlocals = 0; 3127 first_index = -1; 3128 first_found = PETSC_FALSE; 3129 for (i=0;i<s;i++) { 3130 if (!first_found && PetscRealPart(array[i]) > 0.1) { 3131 first_found = PETSC_TRUE; 3132 first_index = i; 3133 } 3134 nlocals += (PetscInt)PetscRealPart(array[i]); 3135 } 3136 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3137 if (!rank_prec_comm) { 3138 dof_displs[0]=0; 3139 for (i=1;i<size_prec_comm;i++) { 3140 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 3141 } 3142 } 3143 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 3144 if (first_found) { 3145 array[first_index] += (PetscScalar)nlocals; 3146 old_index = first_index; 3147 for (i=first_index+1;i<s;i++) { 3148 if (PetscRealPart(array[i]) > 0.1) { 3149 array[i] += array[old_index]; 3150 old_index = i; 3151 } 3152 } 3153 } 3154 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 3155 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 3156 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3157 ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3158 /* get global ordering of local dofs */ 3159 ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3160 if (local_dofs_mult) { 3161 for (i=0;i<n_local_dofs;i++) { 3162 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 3163 } 3164 } else { 3165 for (i=0;i<n_local_dofs;i++) { 3166 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 3167 } 3168 } 3169 ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 3170 /* free workspace */ 3171 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 3172 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 3173 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 3174 ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr); 3175 /* return pointer to global ordering of local dofs */ 3176 *global_numbering_subset = temp_global_dofs; 3177 PetscFunctionReturn(0); 3178 } 3179 3180 #undef __FUNCT__ 3181 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3182 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3183 { 3184 PetscInt i,j; 3185 PetscScalar *alphas; 3186 PetscErrorCode ierr; 3187 3188 PetscFunctionBegin; 3189 /* this implements stabilized Gram-Schmidt */ 3190 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3191 for (i=0;i<n;i++) { 3192 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3193 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3194 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3195 } 3196 ierr = PetscFree(alphas);CHKERRQ(ierr); 3197 PetscFunctionReturn(0); 3198 } 3199 3200 #undef __FUNCT__ 3201 #define __FUNCT__ "MatISGetSubassemblingPattern" 3202 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends) 3203 { 3204 Mat subdomain_adj; 3205 IS new_ranks,ranks_send_to; 3206 MatPartitioning partitioner; 3207 Mat_IS *matis; 3208 PetscInt n_neighs,*neighs,*n_shared,**shared; 3209 PetscInt prank; 3210 PetscMPIInt size,rank,color; 3211 PetscInt *xadj,*adjncy,*oldranks; 3212 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 3213 PetscInt i,local_size,threshold=0; 3214 PetscErrorCode ierr; 3215 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3216 PetscSubcomm subcomm; 3217 3218 PetscFunctionBegin; 3219 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3220 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3221 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3222 3223 /* Get info on mapping */ 3224 matis = (Mat_IS*)(mat->data); 3225 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 3226 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3227 3228 /* build local CSR graph of subdomains' connectivity */ 3229 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3230 xadj[0] = 0; 3231 xadj[1] = PetscMax(n_neighs-1,0); 3232 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3233 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3234 3235 if (threshold) { 3236 PetscInt xadj_count = 0; 3237 for (i=1;i<n_neighs;i++) { 3238 if (n_shared[i] > threshold) { 3239 adjncy[xadj_count] = neighs[i]; 3240 adjncy_wgt[xadj_count] = n_shared[i]; 3241 xadj_count++; 3242 } 3243 } 3244 xadj[1] = xadj_count; 3245 } else { 3246 if (xadj[1]) { 3247 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3248 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3249 } 3250 } 3251 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3252 if (use_square) { 3253 for (i=0;i<xadj[1];i++) { 3254 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3255 } 3256 } 3257 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3258 3259 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3260 3261 /* 3262 Restrict work on active processes only. 3263 */ 3264 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3265 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3266 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3267 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3268 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3269 if (color) { 3270 ierr = PetscFree(xadj);CHKERRQ(ierr); 3271 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3272 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3273 } else { 3274 PetscInt coarsening_ratio; 3275 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3276 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3277 prank = rank; 3278 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3279 /* 3280 for (i=0;i<size;i++) { 3281 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3282 } 3283 */ 3284 for (i=0;i<xadj[1];i++) { 3285 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3286 } 3287 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3288 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3289 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3290 3291 /* Partition */ 3292 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3293 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3294 if (use_vwgt) { 3295 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3296 v_wgt[0] = local_size; 3297 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3298 } 3299 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3300 coarsening_ratio = size/n_subdomains; 3301 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3302 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3303 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3304 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3305 3306 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3307 if (contiguous) { 3308 ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */ 3309 } else { 3310 ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */ 3311 } 3312 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3313 /* clean up */ 3314 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3315 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3316 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3317 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3318 } 3319 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3320 3321 /* assemble parallel IS for sends */ 3322 i = 1; 3323 if (color) i=0; 3324 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3325 3326 /* get back IS */ 3327 *is_sends = ranks_send_to; 3328 PetscFunctionReturn(0); 3329 } 3330 3331 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3332 3333 #undef __FUNCT__ 3334 #define __FUNCT__ "MatISSubassemble" 3335 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3336 { 3337 Mat local_mat; 3338 Mat_IS *matis; 3339 IS is_sends_internal; 3340 PetscInt rows,cols,new_local_rows; 3341 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3342 PetscBool ismatis,isdense,newisdense,destroy_mat; 3343 ISLocalToGlobalMapping l2gmap; 3344 PetscInt* l2gmap_indices; 3345 const PetscInt* is_indices; 3346 MatType new_local_type; 3347 /* buffers */ 3348 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3349 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3350 PetscInt *recv_buffer_idxs_local; 3351 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3352 /* MPI */ 3353 MPI_Comm comm,comm_n; 3354 PetscSubcomm subcomm; 3355 PetscMPIInt n_sends,n_recvs,commsize; 3356 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3357 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3358 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3359 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3360 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3361 PetscErrorCode ierr; 3362 3363 PetscFunctionBegin; 3364 /* TODO: add missing checks */ 3365 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3366 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3367 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3368 PetscValidLogicalCollectiveInt(mat,nis,7); 3369 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3370 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3371 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3372 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3373 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3374 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3375 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3376 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3377 PetscInt mrows,mcols,mnrows,mncols; 3378 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3379 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3380 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3381 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3382 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3383 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3384 } 3385 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3386 PetscValidLogicalCollectiveInt(mat,bs,0); 3387 /* prepare IS for sending if not provided */ 3388 if (!is_sends) { 3389 PetscBool pcontig = PETSC_TRUE; 3390 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3391 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr); 3392 } else { 3393 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3394 is_sends_internal = is_sends; 3395 } 3396 3397 /* get pointer of MATIS data */ 3398 matis = (Mat_IS*)mat->data; 3399 3400 /* get comm */ 3401 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3402 3403 /* compute number of sends */ 3404 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3405 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3406 3407 /* compute number of receives */ 3408 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3409 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3410 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3411 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3412 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3413 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3414 ierr = PetscFree(iflags);CHKERRQ(ierr); 3415 3416 /* restrict comm if requested */ 3417 subcomm = 0; 3418 destroy_mat = PETSC_FALSE; 3419 if (restrict_comm) { 3420 PetscMPIInt color,subcommsize; 3421 3422 color = 0; 3423 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */ 3424 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3425 subcommsize = commsize - subcommsize; 3426 /* check if reuse has been requested */ 3427 if (reuse == MAT_REUSE_MATRIX) { 3428 if (*mat_n) { 3429 PetscMPIInt subcommsize2; 3430 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3431 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3432 comm_n = PetscObjectComm((PetscObject)*mat_n); 3433 } else { 3434 comm_n = PETSC_COMM_SELF; 3435 } 3436 } else { /* MAT_INITIAL_MATRIX */ 3437 PetscMPIInt rank; 3438 3439 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3440 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 3441 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 3442 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3443 comm_n = PetscSubcommChild(subcomm); 3444 } 3445 /* flag to destroy *mat_n if not significative */ 3446 if (color) destroy_mat = PETSC_TRUE; 3447 } else { 3448 comm_n = comm; 3449 } 3450 3451 /* prepare send/receive buffers */ 3452 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 3453 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 3454 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 3455 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 3456 if (nis) { 3457 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 3458 } 3459 3460 /* Get data from local matrices */ 3461 if (!isdense) { 3462 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 3463 /* TODO: See below some guidelines on how to prepare the local buffers */ 3464 /* 3465 send_buffer_vals should contain the raw values of the local matrix 3466 send_buffer_idxs should contain: 3467 - MatType_PRIVATE type 3468 - PetscInt size_of_l2gmap 3469 - PetscInt global_row_indices[size_of_l2gmap] 3470 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 3471 */ 3472 } else { 3473 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3474 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 3475 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 3476 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 3477 send_buffer_idxs[1] = i; 3478 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3479 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 3480 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 3481 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 3482 for (i=0;i<n_sends;i++) { 3483 ilengths_vals[is_indices[i]] = len*len; 3484 ilengths_idxs[is_indices[i]] = len+2; 3485 } 3486 } 3487 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 3488 /* additional is (if any) */ 3489 if (nis) { 3490 PetscMPIInt psum; 3491 PetscInt j; 3492 for (j=0,psum=0;j<nis;j++) { 3493 PetscInt plen; 3494 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3495 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 3496 psum += len+1; /* indices + lenght */ 3497 } 3498 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 3499 for (j=0,psum=0;j<nis;j++) { 3500 PetscInt plen; 3501 const PetscInt *is_array_idxs; 3502 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 3503 send_buffer_idxs_is[psum] = plen; 3504 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3505 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 3506 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 3507 psum += plen+1; /* indices + lenght */ 3508 } 3509 for (i=0;i<n_sends;i++) { 3510 ilengths_idxs_is[is_indices[i]] = psum; 3511 } 3512 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 3513 } 3514 3515 buf_size_idxs = 0; 3516 buf_size_vals = 0; 3517 buf_size_idxs_is = 0; 3518 for (i=0;i<n_recvs;i++) { 3519 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3520 buf_size_vals += (PetscInt)olengths_vals[i]; 3521 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 3522 } 3523 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 3524 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 3525 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 3526 3527 /* get new tags for clean communications */ 3528 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 3529 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 3530 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 3531 3532 /* allocate for requests */ 3533 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 3534 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 3535 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 3536 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 3537 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 3538 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 3539 3540 /* communications */ 3541 ptr_idxs = recv_buffer_idxs; 3542 ptr_vals = recv_buffer_vals; 3543 ptr_idxs_is = recv_buffer_idxs_is; 3544 for (i=0;i<n_recvs;i++) { 3545 source_dest = onodes[i]; 3546 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 3547 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 3548 ptr_idxs += olengths_idxs[i]; 3549 ptr_vals += olengths_vals[i]; 3550 if (nis) { 3551 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 3552 ptr_idxs_is += olengths_idxs_is[i]; 3553 } 3554 } 3555 for (i=0;i<n_sends;i++) { 3556 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 3557 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 3558 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 3559 if (nis) { 3560 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 3561 } 3562 } 3563 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3564 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 3565 3566 /* assemble new l2g map */ 3567 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3568 ptr_idxs = recv_buffer_idxs; 3569 new_local_rows = 0; 3570 for (i=0;i<n_recvs;i++) { 3571 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3572 ptr_idxs += olengths_idxs[i]; 3573 } 3574 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 3575 ptr_idxs = recv_buffer_idxs; 3576 new_local_rows = 0; 3577 for (i=0;i<n_recvs;i++) { 3578 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 3579 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 3580 ptr_idxs += olengths_idxs[i]; 3581 } 3582 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 3583 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3584 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3585 3586 /* infer new local matrix type from received local matrices type */ 3587 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3588 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3589 if (n_recvs) { 3590 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3591 ptr_idxs = recv_buffer_idxs; 3592 for (i=0;i<n_recvs;i++) { 3593 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3594 new_local_type_private = MATAIJ_PRIVATE; 3595 break; 3596 } 3597 ptr_idxs += olengths_idxs[i]; 3598 } 3599 switch (new_local_type_private) { 3600 case MATDENSE_PRIVATE: 3601 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 3602 new_local_type = MATSEQAIJ; 3603 bs = 1; 3604 } else { /* if I receive only 1 dense matrix */ 3605 new_local_type = MATSEQDENSE; 3606 bs = 1; 3607 } 3608 break; 3609 case MATAIJ_PRIVATE: 3610 new_local_type = MATSEQAIJ; 3611 bs = 1; 3612 break; 3613 case MATBAIJ_PRIVATE: 3614 new_local_type = MATSEQBAIJ; 3615 break; 3616 case MATSBAIJ_PRIVATE: 3617 new_local_type = MATSEQSBAIJ; 3618 break; 3619 default: 3620 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 3621 break; 3622 } 3623 } else { /* by default, new_local_type is seqdense */ 3624 new_local_type = MATSEQDENSE; 3625 bs = 1; 3626 } 3627 3628 /* create MATIS object if needed */ 3629 if (reuse == MAT_INITIAL_MATRIX) { 3630 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3631 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr); 3632 } else { 3633 /* it also destroys the local matrices */ 3634 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 3635 } 3636 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 3637 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3638 3639 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3640 3641 /* Global to local map of received indices */ 3642 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 3643 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 3644 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3645 3646 /* restore attributes -> type of incoming data and its size */ 3647 buf_size_idxs = 0; 3648 for (i=0;i<n_recvs;i++) { 3649 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 3650 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 3651 buf_size_idxs += (PetscInt)olengths_idxs[i]; 3652 } 3653 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3654 3655 /* set preallocation */ 3656 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 3657 if (!newisdense) { 3658 PetscInt *new_local_nnz=0; 3659 3660 ptr_vals = recv_buffer_vals; 3661 ptr_idxs = recv_buffer_idxs_local; 3662 if (n_recvs) { 3663 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 3664 } 3665 for (i=0;i<n_recvs;i++) { 3666 PetscInt j; 3667 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 3668 for (j=0;j<*(ptr_idxs+1);j++) { 3669 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 3670 } 3671 } else { 3672 /* TODO */ 3673 } 3674 ptr_idxs += olengths_idxs[i]; 3675 } 3676 if (new_local_nnz) { 3677 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 3678 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 3679 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 3680 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3681 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 3682 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 3683 } else { 3684 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3685 } 3686 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 3687 } else { 3688 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 3689 } 3690 3691 /* set values */ 3692 ptr_vals = recv_buffer_vals; 3693 ptr_idxs = recv_buffer_idxs_local; 3694 for (i=0;i<n_recvs;i++) { 3695 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3696 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3697 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3698 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3699 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3700 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3701 } else { 3702 /* TODO */ 3703 } 3704 ptr_idxs += olengths_idxs[i]; 3705 ptr_vals += olengths_vals[i]; 3706 } 3707 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3708 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3709 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3710 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3711 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3712 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 3713 3714 #if 0 3715 if (!restrict_comm) { /* check */ 3716 Vec lvec,rvec; 3717 PetscReal infty_error; 3718 3719 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3720 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3721 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3722 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3723 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 3724 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3725 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3726 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3727 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3728 } 3729 #endif 3730 3731 /* assemble new additional is (if any) */ 3732 if (nis) { 3733 PetscInt **temp_idxs,*count_is,j,psum; 3734 3735 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3736 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 3737 ptr_idxs = recv_buffer_idxs_is; 3738 psum = 0; 3739 for (i=0;i<n_recvs;i++) { 3740 for (j=0;j<nis;j++) { 3741 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3742 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 3743 psum += plen; 3744 ptr_idxs += plen+1; /* shift pointer to received data */ 3745 } 3746 } 3747 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 3748 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 3749 for (i=1;i<nis;i++) { 3750 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 3751 } 3752 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 3753 ptr_idxs = recv_buffer_idxs_is; 3754 for (i=0;i<n_recvs;i++) { 3755 for (j=0;j<nis;j++) { 3756 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 3757 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 3758 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 3759 ptr_idxs += plen+1; /* shift pointer to received data */ 3760 } 3761 } 3762 for (i=0;i<nis;i++) { 3763 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3764 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 3765 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3766 } 3767 ierr = PetscFree(count_is);CHKERRQ(ierr); 3768 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 3769 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 3770 } 3771 /* free workspace */ 3772 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 3773 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3774 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3775 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3776 if (isdense) { 3777 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3778 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3779 } else { 3780 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3781 } 3782 if (nis) { 3783 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3784 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 3785 } 3786 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3787 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3788 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 3789 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3790 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3791 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 3792 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3793 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3794 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3795 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3796 ierr = PetscFree(onodes);CHKERRQ(ierr); 3797 if (nis) { 3798 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 3799 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 3800 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 3801 } 3802 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3803 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 3804 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 3805 for (i=0;i<nis;i++) { 3806 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 3807 } 3808 } 3809 PetscFunctionReturn(0); 3810 } 3811 3812 /* temporary hack into ksp private data structure */ 3813 #include <petsc/private/kspimpl.h> 3814 3815 #undef __FUNCT__ 3816 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3817 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3818 { 3819 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3820 PC_IS *pcis = (PC_IS*)pc->data; 3821 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3822 MatNullSpace CoarseNullSpace=NULL; 3823 ISLocalToGlobalMapping coarse_islg; 3824 IS coarse_is,*isarray; 3825 PetscInt i,im_active=-1,active_procs=-1; 3826 PetscInt nis,nisdofs,nisneu; 3827 PC pc_temp; 3828 PCType coarse_pc_type; 3829 KSPType coarse_ksp_type; 3830 PetscBool multilevel_requested,multilevel_allowed; 3831 PetscBool isredundant,isbddc,isnn,coarse_reuse; 3832 Mat t_coarse_mat_is; 3833 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 3834 PetscMPIInt all_procs; 3835 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 3836 PetscBool compute_vecs = PETSC_FALSE; 3837 PetscScalar *array; 3838 PetscErrorCode ierr; 3839 3840 PetscFunctionBegin; 3841 /* Assign global numbering to coarse dofs */ 3842 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 3843 PetscInt ocoarse_size; 3844 compute_vecs = PETSC_TRUE; 3845 ocoarse_size = pcbddc->coarse_size; 3846 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3847 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3848 /* see if we can avoid some work */ 3849 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3850 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3851 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3852 coarse_reuse = PETSC_FALSE; 3853 } else { /* we can safely reuse already computed coarse matrix */ 3854 coarse_reuse = PETSC_TRUE; 3855 } 3856 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3857 coarse_reuse = PETSC_FALSE; 3858 } 3859 /* reset any subassembling information */ 3860 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3861 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 3862 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 3863 coarse_reuse = PETSC_TRUE; 3864 } 3865 3866 /* count "active" (i.e. with positive local size) and "void" processes */ 3867 im_active = !!(pcis->n); 3868 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3869 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 3870 void_procs = all_procs-active_procs; 3871 csin_type_simple = PETSC_TRUE; 3872 redist = PETSC_FALSE; 3873 if (pcbddc->current_level && void_procs) { 3874 csin_ml = PETSC_TRUE; 3875 ncoarse_ml = void_procs; 3876 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 3877 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 3878 csin_ds = PETSC_TRUE; 3879 ncoarse_ds = pcbddc->redistribute_coarse; 3880 redist = PETSC_TRUE; 3881 } else { 3882 csin_ds = PETSC_TRUE; 3883 ncoarse_ds = active_procs; 3884 redist = PETSC_TRUE; 3885 } 3886 } else { 3887 csin_ml = PETSC_FALSE; 3888 ncoarse_ml = all_procs; 3889 if (void_procs) { 3890 csin_ds = PETSC_TRUE; 3891 ncoarse_ds = void_procs; 3892 csin_type_simple = PETSC_FALSE; 3893 } else { 3894 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 3895 csin_ds = PETSC_TRUE; 3896 ncoarse_ds = pcbddc->redistribute_coarse; 3897 redist = PETSC_TRUE; 3898 } else { 3899 csin_ds = PETSC_FALSE; 3900 ncoarse_ds = all_procs; 3901 } 3902 } 3903 } 3904 3905 /* 3906 test if we can go multilevel: three conditions must be satisfied: 3907 - we have not exceeded the number of levels requested 3908 - we can actually subassemble the active processes 3909 - we can find a suitable number of MPI processes where we can place the subassembled problem 3910 */ 3911 multilevel_allowed = PETSC_FALSE; 3912 multilevel_requested = PETSC_FALSE; 3913 if (pcbddc->current_level < pcbddc->max_levels) { 3914 multilevel_requested = PETSC_TRUE; 3915 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 3916 multilevel_allowed = PETSC_FALSE; 3917 } else { 3918 multilevel_allowed = PETSC_TRUE; 3919 } 3920 } 3921 /* determine number of process partecipating to coarse solver */ 3922 if (multilevel_allowed) { 3923 ncoarse = ncoarse_ml; 3924 csin = csin_ml; 3925 redist = PETSC_FALSE; 3926 } else { 3927 ncoarse = ncoarse_ds; 3928 csin = csin_ds; 3929 } 3930 3931 /* creates temporary l2gmap and IS for coarse indexes */ 3932 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3933 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3934 3935 /* creates temporary MATIS object for coarse matrix */ 3936 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 3937 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3938 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 3939 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 3940 #if 0 3941 { 3942 PetscViewer viewer; 3943 char filename[256]; 3944 sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank); 3945 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 3946 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3947 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 3948 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 3949 } 3950 #endif 3951 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr); 3952 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3953 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3954 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3955 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3956 3957 /* compute dofs splitting and neumann boundaries for coarse dofs */ 3958 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 3959 PetscInt *tidxs,*tidxs2,nout,tsize,i; 3960 const PetscInt *idxs; 3961 ISLocalToGlobalMapping tmap; 3962 3963 /* create map between primal indices (in local representative ordering) and local primal numbering */ 3964 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 3965 /* allocate space for temporary storage */ 3966 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 3967 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 3968 /* allocate for IS array */ 3969 nisdofs = pcbddc->n_ISForDofsLocal; 3970 nisneu = !!pcbddc->NeumannBoundariesLocal; 3971 nis = nisdofs + nisneu; 3972 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 3973 /* dofs splitting */ 3974 for (i=0;i<nisdofs;i++) { 3975 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 3976 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 3977 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3978 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3979 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 3980 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3981 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 3982 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 3983 } 3984 /* neumann boundaries */ 3985 if (pcbddc->NeumannBoundariesLocal) { 3986 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 3987 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 3988 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3989 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 3990 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 3991 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 3992 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 3993 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 3994 } 3995 /* free memory */ 3996 ierr = PetscFree(tidxs);CHKERRQ(ierr); 3997 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 3998 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 3999 } else { 4000 nis = 0; 4001 nisdofs = 0; 4002 nisneu = 0; 4003 isarray = NULL; 4004 } 4005 /* destroy no longer needed map */ 4006 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4007 4008 /* restrict on coarse candidates (if needed) */ 4009 coarse_mat_is = NULL; 4010 if (csin) { 4011 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4012 if (redist) { 4013 PetscMPIInt rank; 4014 PetscInt spc,n_spc_p1,dest[1],destsize; 4015 4016 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4017 spc = active_procs/ncoarse; 4018 n_spc_p1 = active_procs%ncoarse; 4019 if (im_active) { 4020 destsize = 1; 4021 if (rank > n_spc_p1*(spc+1)-1) { 4022 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4023 } else { 4024 dest[0] = rank/(spc+1); 4025 } 4026 } else { 4027 destsize = 0; 4028 } 4029 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4030 } else if (csin_type_simple) { 4031 PetscMPIInt rank; 4032 PetscInt issize,isidx; 4033 4034 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4035 if (im_active) { 4036 issize = 1; 4037 isidx = (PetscInt)rank; 4038 } else { 4039 issize = 0; 4040 isidx = -1; 4041 } 4042 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4043 } else { /* get a suitable subassembling pattern from MATIS code */ 4044 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4045 } 4046 4047 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4048 if (!redist || ncoarse <= void_procs) { 4049 PetscInt ncoarse_cand,tissize,*nisindices; 4050 PetscInt *coarse_candidates; 4051 const PetscInt* tisindices; 4052 4053 /* get coarse candidates' ranks in pc communicator */ 4054 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4055 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4056 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4057 if (!coarse_candidates[i]) { 4058 coarse_candidates[ncoarse_cand++]=i; 4059 } 4060 } 4061 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4062 4063 4064 if (pcbddc->dbg_flag) { 4065 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4067 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4068 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4069 for (i=0;i<ncoarse_cand;i++) { 4070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4071 } 4072 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4073 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4074 } 4075 /* shift the pattern on coarse candidates */ 4076 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4077 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4078 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4079 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4080 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4081 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4082 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4083 } 4084 if (pcbddc->dbg_flag) { 4085 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4087 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4088 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4089 } 4090 } 4091 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4092 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4093 } else { 4094 if (pcbddc->dbg_flag) { 4095 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4098 } 4099 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4100 coarse_mat_is = t_coarse_mat_is; 4101 } 4102 4103 /* create local to global scatters for coarse problem */ 4104 if (compute_vecs) { 4105 PetscInt lrows; 4106 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4107 if (coarse_mat_is) { 4108 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4109 } else { 4110 lrows = 0; 4111 } 4112 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4113 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4114 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4115 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4116 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4117 } 4118 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4119 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4120 4121 /* set defaults for coarse KSP and PC */ 4122 if (multilevel_allowed) { 4123 coarse_ksp_type = KSPRICHARDSON; 4124 coarse_pc_type = PCBDDC; 4125 } else { 4126 coarse_ksp_type = KSPPREONLY; 4127 coarse_pc_type = PCREDUNDANT; 4128 } 4129 4130 /* print some info if requested */ 4131 if (pcbddc->dbg_flag) { 4132 if (!multilevel_allowed) { 4133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4134 if (multilevel_requested) { 4135 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4136 } else if (pcbddc->max_levels) { 4137 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4138 } 4139 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4140 } 4141 } 4142 4143 /* create the coarse KSP object only once with defaults */ 4144 if (coarse_mat_is) { 4145 MatReuse coarse_mat_reuse; 4146 PetscViewer dbg_viewer = NULL; 4147 if (pcbddc->dbg_flag) { 4148 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4149 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4150 } 4151 if (!pcbddc->coarse_ksp) { 4152 char prefix[256],str_level[16]; 4153 size_t len; 4154 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4155 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4156 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4157 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4158 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4159 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4160 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4161 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4162 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4163 /* prefix */ 4164 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4165 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4166 if (!pcbddc->current_level) { 4167 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4168 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4169 } else { 4170 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4171 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4172 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4173 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4174 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4175 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4176 } 4177 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4178 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4179 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4180 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4181 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4182 /* allow user customization */ 4183 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4184 } 4185 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4186 if (nisdofs) { 4187 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4188 for (i=0;i<nisdofs;i++) { 4189 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4190 } 4191 } 4192 if (nisneu) { 4193 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4194 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4195 } 4196 4197 /* get some info after set from options */ 4198 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4199 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4200 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4201 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4202 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4203 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4204 isbddc = PETSC_FALSE; 4205 } 4206 if (isredundant) { 4207 KSP inner_ksp; 4208 PC inner_pc; 4209 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4210 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4211 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4212 } 4213 4214 /* assemble coarse matrix */ 4215 if (coarse_reuse) { 4216 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4217 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4218 coarse_mat_reuse = MAT_REUSE_MATRIX; 4219 } else { 4220 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4221 } 4222 if (isbddc || isnn) { 4223 if (pcbddc->coarsening_ratio > 1) { 4224 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4225 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4226 if (pcbddc->dbg_flag) { 4227 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4228 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4229 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4230 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4231 } 4232 } 4233 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4234 } else { 4235 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4236 coarse_mat = coarse_mat_is; 4237 } 4238 } else { 4239 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4240 } 4241 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4242 4243 /* propagate symmetry info to coarse matrix */ 4244 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr); 4245 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4246 4247 /* set operators */ 4248 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4249 if (pcbddc->dbg_flag) { 4250 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4251 } 4252 } else { /* processes non partecipating to coarse solver (if any) */ 4253 coarse_mat = 0; 4254 } 4255 ierr = PetscFree(isarray);CHKERRQ(ierr); 4256 #if 0 4257 { 4258 PetscViewer viewer; 4259 char filename[256]; 4260 sprintf(filename,"coarse_mat.m"); 4261 ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr); 4262 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4263 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4264 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4265 } 4266 #endif 4267 4268 /* Compute coarse null space (special handling by BDDC only) */ 4269 if (pcbddc->NullSpace) { 4270 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4271 } 4272 4273 if (pcbddc->coarse_ksp) { 4274 Vec crhs,csol; 4275 PetscBool ispreonly; 4276 if (CoarseNullSpace) { 4277 if (isbddc) { 4278 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4279 } else { 4280 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 4281 } 4282 } 4283 /* setup coarse ksp */ 4284 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4285 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4286 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4287 /* hack */ 4288 if (!csol) { 4289 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4290 } 4291 if (!crhs) { 4292 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4293 } 4294 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4295 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4296 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4297 KSP check_ksp; 4298 KSPType check_ksp_type; 4299 PC check_pc; 4300 Vec check_vec,coarse_vec; 4301 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4302 PetscInt its; 4303 PetscBool compute_eigs; 4304 PetscReal *eigs_r,*eigs_c; 4305 PetscInt neigs; 4306 const char *prefix; 4307 4308 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4309 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4310 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4311 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4312 if (ispreonly) { 4313 check_ksp_type = KSPPREONLY; 4314 compute_eigs = PETSC_FALSE; 4315 } else { 4316 check_ksp_type = KSPGMRES; 4317 compute_eigs = PETSC_TRUE; 4318 } 4319 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4320 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4321 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4322 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4323 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4324 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4325 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4326 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4327 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4328 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4329 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4330 /* create random vec */ 4331 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4332 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4333 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4334 if (CoarseNullSpace) { 4335 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4336 } 4337 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4338 /* solve coarse problem */ 4339 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4340 if (CoarseNullSpace) { 4341 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4342 } 4343 /* set eigenvalue estimation if preonly has not been requested */ 4344 if (compute_eigs) { 4345 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4346 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4347 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4348 lambda_max = eigs_r[neigs-1]; 4349 lambda_min = eigs_r[0]; 4350 if (pcbddc->use_coarse_estimates) { 4351 if (lambda_max>lambda_min) { 4352 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4353 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4354 } 4355 } 4356 } 4357 4358 /* check coarse problem residual error */ 4359 if (pcbddc->dbg_flag) { 4360 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4361 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4362 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4363 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4364 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4365 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4366 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4367 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4368 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4369 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4370 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4371 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4372 if (compute_eigs) { 4373 PetscReal lambda_max_s,lambda_min_s; 4374 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4375 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4376 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4377 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4378 for (i=0;i<neigs;i++) { 4379 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4380 } 4381 } 4382 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4383 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4384 } 4385 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4386 if (compute_eigs) { 4387 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4388 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4389 } 4390 } 4391 } 4392 /* print additional info */ 4393 if (pcbddc->dbg_flag) { 4394 /* waits until all processes reaches this point */ 4395 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4397 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4398 } 4399 4400 /* free memory */ 4401 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4402 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4408 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4409 { 4410 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4411 PC_IS* pcis = (PC_IS*)pc->data; 4412 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4413 PetscInt i,local_size,coarse_size=0; 4414 PetscInt *local_primal_indices=NULL; 4415 PetscInt *t_local_primal_indices=NULL; 4416 PetscErrorCode ierr; 4417 4418 PetscFunctionBegin; 4419 /* Compute global number of coarse dofs */ 4420 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 4421 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 4422 } 4423 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr); 4424 ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr); 4425 local_size = 0; 4426 for (i=0;i<pcbddc->local_primal_size_cc;i++) { 4427 PetscInt j; 4428 for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j; 4429 } 4430 ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr); 4431 if (local_size != pcbddc->local_primal_size) { 4432 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 4433 } 4434 4435 /* check numbering */ 4436 if (pcbddc->dbg_flag) { 4437 PetscScalar coarsesum,*array; 4438 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 4439 4440 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4441 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4442 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 4443 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 4444 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 4445 for (i=0;i<pcbddc->local_primal_size;i++) { 4446 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 4447 } 4448 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 4449 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 4450 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4451 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4452 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4453 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4454 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4455 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4456 for (i=0;i<pcis->n;i++) { 4457 if (array[i] == 1.0) { 4458 set_error = PETSC_TRUE; 4459 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 4460 } 4461 } 4462 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4463 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4464 for (i=0;i<pcis->n;i++) { 4465 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 4466 } 4467 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 4468 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 4469 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4470 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4471 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 4472 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 4473 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 4474 PetscInt *gidxs; 4475 4476 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 4477 ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 4478 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 4479 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4480 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 4481 for (i=0;i<pcbddc->local_primal_size;i++) { 4482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 4483 } 4484 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4485 ierr = PetscFree(gidxs);CHKERRQ(ierr); 4486 } 4487 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4488 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 4489 } 4490 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 4491 /* get back data */ 4492 *coarse_size_n = coarse_size; 4493 *local_primal_indices_n = local_primal_indices; 4494 PetscFunctionReturn(0); 4495 } 4496 4497 #undef __FUNCT__ 4498 #define __FUNCT__ "PCBDDCGlobalToLocal" 4499 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 4500 { 4501 IS localis_t; 4502 PetscInt i,lsize,*idxs,n; 4503 PetscScalar *vals; 4504 PetscErrorCode ierr; 4505 4506 PetscFunctionBegin; 4507 /* get indices in local ordering exploiting local to global map */ 4508 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 4509 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 4510 for (i=0;i<lsize;i++) vals[i] = 1.0; 4511 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4512 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 4513 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 4514 if (idxs) { /* multilevel guard */ 4515 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 4516 } 4517 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 4518 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 4519 ierr = PetscFree(vals);CHKERRQ(ierr); 4520 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 4521 /* now compute set in local ordering */ 4522 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4523 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4524 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4525 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 4526 for (i=0,lsize=0;i<n;i++) { 4527 if (PetscRealPart(vals[i]) > 0.5) { 4528 lsize++; 4529 } 4530 } 4531 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 4532 for (i=0,lsize=0;i<n;i++) { 4533 if (PetscRealPart(vals[i]) > 0.5) { 4534 idxs[lsize++] = i; 4535 } 4536 } 4537 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 4538 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 4539 *localis = localis_t; 4540 PetscFunctionReturn(0); 4541 } 4542 4543 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 4544 #undef __FUNCT__ 4545 #define __FUNCT__ "PCBDDCMatMult_Private" 4546 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 4547 { 4548 PCBDDCChange_ctx change_ctx; 4549 PetscErrorCode ierr; 4550 4551 PetscFunctionBegin; 4552 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4553 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4554 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4555 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4556 PetscFunctionReturn(0); 4557 } 4558 4559 #undef __FUNCT__ 4560 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 4561 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 4562 { 4563 PCBDDCChange_ctx change_ctx; 4564 PetscErrorCode ierr; 4565 4566 PetscFunctionBegin; 4567 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 4568 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 4569 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 4570 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 4571 PetscFunctionReturn(0); 4572 } 4573 4574 #undef __FUNCT__ 4575 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 4576 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 4577 { 4578 PC_IS *pcis=(PC_IS*)pc->data; 4579 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4580 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4581 Mat S_j; 4582 PetscInt *used_xadj,*used_adjncy; 4583 PetscBool free_used_adj; 4584 PetscErrorCode ierr; 4585 4586 PetscFunctionBegin; 4587 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 4588 free_used_adj = PETSC_FALSE; 4589 if (pcbddc->sub_schurs_layers == -1) { 4590 used_xadj = NULL; 4591 used_adjncy = NULL; 4592 } else { 4593 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 4594 used_xadj = pcbddc->mat_graph->xadj; 4595 used_adjncy = pcbddc->mat_graph->adjncy; 4596 } else if (pcbddc->computed_rowadj) { 4597 used_xadj = pcbddc->mat_graph->xadj; 4598 used_adjncy = pcbddc->mat_graph->adjncy; 4599 } else { 4600 PetscBool flg_row=PETSC_FALSE; 4601 const PetscInt *xadj,*adjncy; 4602 PetscInt nvtxs; 4603 4604 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4605 if (flg_row) { 4606 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 4607 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 4608 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 4609 free_used_adj = PETSC_TRUE; 4610 } else { 4611 pcbddc->sub_schurs_layers = -1; 4612 used_xadj = NULL; 4613 used_adjncy = NULL; 4614 } 4615 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4616 } 4617 } 4618 4619 /* setup sub_schurs data */ 4620 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 4621 if (!sub_schurs->use_mumps) { 4622 /* pcbddc->ksp_D up to date only if not using MUMPS */ 4623 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 4624 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4625 } else { 4626 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr); 4627 } 4628 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 4629 4630 /* free adjacency */ 4631 if (free_used_adj) { 4632 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 4633 } 4634 PetscFunctionReturn(0); 4635 } 4636 4637 #undef __FUNCT__ 4638 #define __FUNCT__ "PCBDDCInitSubSchurs" 4639 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 4640 { 4641 PC_IS *pcis=(PC_IS*)pc->data; 4642 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 4643 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 4644 PCBDDCGraph graph; 4645 PetscErrorCode ierr; 4646 4647 PetscFunctionBegin; 4648 /* attach interface graph for determining subsets */ 4649 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 4650 IS verticesIS; 4651 4652 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 4653 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 4654 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 4655 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr); 4656 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 4657 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 4658 /* 4659 if (pcbddc->dbg_flag) { 4660 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 4661 } 4662 */ 4663 } else { 4664 graph = pcbddc->mat_graph; 4665 } 4666 4667 /* sub_schurs init */ 4668 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 4669 4670 /* free graph struct */ 4671 if (pcbddc->sub_schurs_rebuild) { 4672 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 4673 } 4674 PetscFunctionReturn(0); 4675 } 4676