1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignCheck" 10 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PC_IS* pcis = (PC_IS*)(pc->data); 14 IS dirIS = NULL; 15 PetscErrorCode ierr; 16 17 PetscFunctionBegin; 18 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 19 if (zerodiag) { 20 Mat A; 21 Vec vec3_N; 22 PetscScalar *vals; 23 const PetscInt *idxs; 24 PetscInt i,nz; 25 26 /* p0 */ 27 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 28 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 29 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 30 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 31 for (i=0;i<nz;i++) vals[i] = 1.; /* TODO add quadrature */ 32 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 33 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 34 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 35 /* v_I */ 36 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 37 for (i=0;i<nz;i++) vals[i] = 0.; 38 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 39 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 40 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 41 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 42 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 43 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 44 if (dirIS) { 45 PetscInt n; 46 47 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 48 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 49 for (i=0;i<n;i++) vals[i] = 0.; 50 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 51 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 52 } 53 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 54 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 55 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 56 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 57 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 58 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 59 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 60 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { /* TODO: should I add the A-norm in test? */ 61 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 62 } 63 ierr = PetscFree(vals);CHKERRQ(ierr); 64 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 65 } 66 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 67 68 /* check PCBDDCBenignGetOrSetP0 */ 69 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 70 pcbddc->benign_p0 = -PetscGlobalRank; 71 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 72 pcbddc->benign_p0 = 1; 73 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 74 if (pcbddc->benign_p0_gidx >=0 && pcbddc->benign_p0 != -PetscGlobalRank) { 75 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e instead of %1.4e\n",pcbddc->benign_p0,-PetscGlobalRank);CHKERRQ(ierr); 76 } 77 PetscFunctionReturn(0); 78 } 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 82 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 83 { 84 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 85 IS pressures,zerodiag; 86 PetscInt nz,n; 87 PetscBool sorted,have_null; 88 PetscErrorCode ierr; 89 90 PetscFunctionBegin; 91 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 92 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 93 pcbddc->benign_original_mat = pcbddc->local_mat; 94 /* if a local info on dofs is present, assumes the last field is represented by "pressures", otherwise, uses only zerodiagonal dofs (ok if the pressure block is all zero) */ 95 if (pcbddc->n_ISForDofsLocal) { 96 PetscInt p = pcbddc->n_ISForDofsLocal-1; 97 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[p]);CHKERRQ(ierr); 98 pressures = pcbddc->ISForDofsLocal[p]; 99 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 100 if (!sorted) { 101 ierr = ISSort(pressures);CHKERRQ(ierr); 102 } 103 } else { 104 pressures = NULL; 105 } 106 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 107 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 108 if (!sorted) { 109 ierr = ISSort(zerodiag);CHKERRQ(ierr); 110 } 111 /* 112 Check if all the pressure dofs have a zero diagonal 113 If not, a change of basis on pressures is not needed 114 since the local Schur complements are SPD 115 */ 116 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 117 if (pressures) { 118 PetscInt np; 119 PetscBool sameis; 120 121 sameis = PETSC_FALSE; 122 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 123 if (nz == np) { 124 const PetscInt *idxs,*pidxs; 125 126 ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr); 127 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 128 ierr = PetscMemcmp(pidxs,idxs,np*sizeof(PetscInt),&sameis);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 130 ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr); 131 } 132 if (!sameis) { /* destroy index sets and set nz to 0 to avoid next code branch */ 133 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 134 nz = 0; 135 } 136 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 137 } 138 if (nz) { 139 IS zerodiagc; 140 PetscScalar *array; 141 const PetscInt *idxs,*idxsc; 142 PetscInt i,*nnz; 143 144 /* TODO: add check for shared dofs and raise error */ 145 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 146 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 147 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 148 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 149 /* local change of basis for pressures */ 150 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 151 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 152 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 153 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 154 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 155 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */ 156 for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */ 157 nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */ 158 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 159 ierr = PetscFree(nnz);CHKERRQ(ierr); 160 /* set identity on velocities */ 161 for (i=0;i<n-nz;i++) { 162 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 163 } 164 /* set change on pressures */ 165 for (i=0;i<nz-1;i++) { 166 PetscScalar vals[2]; 167 PetscInt cols[2]; 168 169 /* TODO: add quadrature */ 170 cols[0] = idxs[i]; 171 cols[1] = idxs[nz-1]; 172 vals[0] = 1.; 173 vals[1] = 1.; 174 ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 175 } 176 ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr); 177 for (i=0;i<nz-1;i++) array[i] = -1.; 178 array[nz-1] = 1.; 179 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 180 ierr = PetscFree(array);CHKERRQ(ierr); 181 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 183 /* TODO: need optimization? */ 184 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 185 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 186 /* store local and global idxs for p0 */ 187 pcbddc->benign_p0_lidx = idxs[nz-1]; 188 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 189 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 190 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 191 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 192 } else { /* if nz == 0, destroy local IS */ 193 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 194 } 195 196 /* determines if the coarse solver will be singular or not */ 197 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 198 have_null = PETSC_TRUE; 199 if (!zerodiag && n) have_null = PETSC_FALSE; 200 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 201 *zerodiaglocal = zerodiag; 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 207 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 208 { 209 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 210 PetscErrorCode ierr; 211 212 PetscFunctionBegin; 213 if (!pcbddc->benign_sf) { 214 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 215 if (pcbddc->benign_p0_gidx >= 0) { 216 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 217 } else { 218 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 219 } 220 } 221 if (get) { /* use SF to get values */ 222 PetscScalar *array; 223 224 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 225 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 226 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 227 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 228 } else { /* use VecSetValue */ 229 if (pcbddc->benign_p0_gidx >= 0) { 230 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 231 } 232 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 233 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 240 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 241 { 242 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 243 PetscErrorCode ierr; 244 245 PetscFunctionBegin; 246 /* TODO: add error checking 247 - avoid nested pop (or push) calls. 248 - cannot push before pop. 249 - cannot call this if pcbddc->local_mat is NULL 250 */ 251 if (pcbddc->benign_p0_lidx < 0) { 252 PetscFunctionReturn(0); 253 } 254 if (pop) { 255 const PetscInt *cB0_cols; 256 PetscInt cB0_ncol; 257 const PetscScalar *cB0_vals; 258 259 /* extract B_0 */ 260 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 261 pcbddc->B0_ncol = cB0_ncol; 262 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 263 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 264 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 266 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 267 /* remove rows and cols from local problem */ 268 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 269 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 270 } else { /* push */ 271 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 272 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 273 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 274 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 275 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "PCBDDCAdaptiveSelection" 282 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 283 { 284 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 285 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 286 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 287 PetscBLASInt *B_iwork,*B_ifail; 288 PetscScalar *work,lwork; 289 PetscScalar *St,*S,*eigv; 290 PetscScalar *Sarray,*Starray; 291 PetscReal *eigs,thresh; 292 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 293 PetscBool allocated_S_St; 294 #if defined(PETSC_USE_COMPLEX) 295 PetscReal *rwork; 296 #endif 297 PetscErrorCode ierr; 298 299 PetscFunctionBegin; 300 if (!sub_schurs->use_mumps) { 301 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 302 } 303 304 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 305 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 306 } 307 308 if (pcbddc->dbg_flag) { 309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 310 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 311 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 312 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 313 } 314 315 if (pcbddc->dbg_flag) { 316 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 317 } 318 319 /* max size of subsets */ 320 mss = 0; 321 for (i=0;i<sub_schurs->n_subs;i++) { 322 PetscInt subset_size; 323 324 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 325 mss = PetscMax(mss,subset_size); 326 } 327 328 /* min/max and threshold */ 329 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 330 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 331 nmax = PetscMax(nmin,nmax); 332 allocated_S_St = PETSC_FALSE; 333 if (nmin) { 334 allocated_S_St = PETSC_TRUE; 335 } 336 337 /* allocate lapack workspace */ 338 cum = cum2 = 0; 339 maxneigs = 0; 340 for (i=0;i<sub_schurs->n_subs;i++) { 341 PetscInt n,subset_size; 342 343 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 344 n = PetscMin(subset_size,nmax); 345 cum += subset_size; 346 cum2 += subset_size*n; 347 maxneigs = PetscMax(maxneigs,n); 348 } 349 if (mss) { 350 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 351 PetscBLASInt B_itype = 1; 352 PetscBLASInt B_N = mss; 353 PetscReal zero = 0.0; 354 PetscReal eps = 0.0; /* dlamch? */ 355 356 B_lwork = -1; 357 S = NULL; 358 St = NULL; 359 eigs = NULL; 360 eigv = NULL; 361 B_iwork = NULL; 362 B_ifail = NULL; 363 #if defined(PETSC_USE_COMPLEX) 364 rwork = NULL; 365 #endif 366 thresh = 1.0; 367 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 368 #if defined(PETSC_USE_COMPLEX) 369 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 370 #else 371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 372 #endif 373 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 374 ierr = PetscFPTrapPop();CHKERRQ(ierr); 375 } else { 376 /* TODO */ 377 } 378 } else { 379 lwork = 0; 380 } 381 382 nv = 0; 383 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 384 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 385 } 386 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 387 if (allocated_S_St) { 388 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 389 } 390 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 391 #if defined(PETSC_USE_COMPLEX) 392 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 393 #endif 394 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 395 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 396 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 397 nv+cum,&pcbddc->adaptive_constraints_idxs, 398 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 399 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 400 401 maxneigs = 0; 402 cum = cum2 = cumarray = 0; 403 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 404 pcbddc->adaptive_constraints_data_ptr[0] = 0; 405 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 406 const PetscInt *idxs; 407 408 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 409 for (cum=0;cum<nv;cum++) { 410 pcbddc->adaptive_constraints_n[cum] = 1; 411 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 412 pcbddc->adaptive_constraints_data[cum] = 1.0; 413 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 414 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 415 } 416 cum2 = cum; 417 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 418 } 419 420 if (mss) { /* multilevel */ 421 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 422 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 423 } 424 425 for (i=0;i<sub_schurs->n_subs;i++) { 426 427 const PetscInt *idxs; 428 PetscReal infty = PETSC_MAX_REAL; 429 PetscInt j,subset_size,eigs_start = 0; 430 PetscBLASInt B_N; 431 PetscBool same_data = PETSC_FALSE; 432 433 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 434 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 435 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 436 if (sub_schurs->is_hermitian) { 437 PetscInt j,k; 438 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 439 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 440 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 441 } 442 for (j=0;j<subset_size;j++) { 443 for (k=j;k<subset_size;k++) { 444 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 445 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 446 } 447 } 448 } else { 449 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 450 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 451 } 452 } else { 453 S = Sarray + cumarray; 454 St = Starray + cumarray; 455 } 456 457 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 458 /* see if we can save some work */ 459 if (sub_schurs->n_subs == 1) { 460 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 461 } 462 463 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 464 B_neigs = 0; 465 } else { 466 /* Threshold: this is an heuristic for edges */ 467 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 468 469 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 470 PetscBLASInt B_itype = 1; 471 PetscBLASInt B_IL, B_IU; 472 PetscReal eps = -1.0; /* dlamch? */ 473 PetscInt nmin_s; 474 475 if (pcbddc->dbg_flag) { 476 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 477 } 478 479 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 480 if (thresh > 1.+PETSC_SMALL) { 481 482 /* ask for eigenvalues larger than thresh */ 483 #if defined(PETSC_USE_COMPLEX) 484 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 485 #else 486 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 487 #endif 488 } else { 489 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 490 B_IL = 1; 491 #if defined(PETSC_USE_COMPLEX) 492 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 493 #else 494 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 495 #endif 496 } 497 ierr = PetscFPTrapPop();CHKERRQ(ierr); 498 if (B_ierr) { 499 if (B_ierr < 0 ) { 500 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 501 } else if (B_ierr <= B_N) { 502 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 503 } else { 504 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 505 } 506 } 507 508 if (B_neigs > nmax) { 509 if (pcbddc->dbg_flag) { 510 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 511 } 512 eigs_start = B_neigs -nmax; 513 B_neigs = nmax; 514 } 515 516 nmin_s = PetscMin(nmin,B_N); 517 if (B_neigs < nmin_s) { 518 PetscBLASInt B_neigs2; 519 520 B_IU = B_N - B_neigs; 521 B_IL = B_N - nmin_s + 1; 522 if (pcbddc->dbg_flag) { 523 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 524 } 525 if (sub_schurs->is_hermitian) { 526 PetscInt j; 527 for (j=0;j<subset_size;j++) { 528 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 529 } 530 for (j=0;j<subset_size;j++) { 531 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 532 } 533 } else { 534 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 535 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 536 } 537 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 538 #if defined(PETSC_USE_COMPLEX) 539 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 540 #else 541 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 542 #endif 543 ierr = PetscFPTrapPop();CHKERRQ(ierr); 544 B_neigs += B_neigs2; 545 } 546 if (B_ierr) { 547 if (B_ierr < 0 ) { 548 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 549 } else if (B_ierr <= B_N) { 550 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 551 } else { 552 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 553 } 554 } 555 if (pcbddc->dbg_flag) { 556 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 557 for (j=0;j<B_neigs;j++) { 558 if (eigs[j] == 0.0) { 559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 560 } else { 561 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 562 } 563 } 564 } 565 } else { 566 /* TODO */ 567 } 568 } 569 maxneigs = PetscMax(B_neigs,maxneigs); 570 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 571 if (B_neigs) { 572 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 573 574 if (pcbddc->dbg_flag > 1) { 575 PetscInt ii; 576 for (ii=0;ii<B_neigs;ii++) { 577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 578 for (j=0;j<B_N;j++) { 579 #if defined(PETSC_USE_COMPLEX) 580 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 581 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 583 #else 584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 585 #endif 586 } 587 } 588 } 589 #if 0 590 for (j=0;j<B_neigs;j++) { 591 PetscBLASInt Blas_N,Blas_one = 1.0; 592 PetscScalar norm; 593 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 594 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 595 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 596 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 597 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 598 } else { 599 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 600 } 601 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 602 } 603 #endif 604 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 605 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 606 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 607 cum++; 608 } 609 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 610 /* shift for next computation */ 611 cumarray += subset_size*subset_size; 612 } 613 if (pcbddc->dbg_flag) { 614 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 615 } 616 617 if (mss) { 618 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 619 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 620 /* destroy matrices (junk) */ 621 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 622 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 623 } 624 if (allocated_S_St) { 625 ierr = PetscFree2(S,St);CHKERRQ(ierr); 626 } 627 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 628 #if defined(PETSC_USE_COMPLEX) 629 ierr = PetscFree(rwork);CHKERRQ(ierr); 630 #endif 631 if (pcbddc->dbg_flag) { 632 PetscInt maxneigs_r; 633 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 635 } 636 PetscFunctionReturn(0); 637 } 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "PCBDDCSetUpSolvers" 641 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 642 { 643 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 644 PetscScalar *coarse_submat_vals; 645 PetscErrorCode ierr; 646 647 PetscFunctionBegin; 648 /* Setup local scatters R_to_B and (optionally) R_to_D */ 649 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 650 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 651 652 /* Setup local neumann solver ksp_R */ 653 /* PCBDDCSetUpLocalScatters should be called first! */ 654 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 655 656 /* Change global null space passed in by the user if change of basis has been requested */ 657 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 658 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 659 } 660 661 /* 662 Setup local correction and local part of coarse basis. 663 Gives back the dense local part of the coarse matrix in column major ordering 664 */ 665 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 666 667 /* Compute total number of coarse nodes and setup coarse solver */ 668 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 669 670 /* free */ 671 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "PCBDDCResetCustomization" 677 PetscErrorCode PCBDDCResetCustomization(PC pc) 678 { 679 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 680 PetscErrorCode ierr; 681 682 PetscFunctionBegin; 683 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 684 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 685 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 686 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 687 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 688 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 689 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 690 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 691 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 692 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 693 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "PCBDDCResetTopography" 699 PetscErrorCode PCBDDCResetTopography(PC pc) 700 { 701 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 702 PetscErrorCode ierr; 703 704 PetscFunctionBegin; 705 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 706 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 707 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 708 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 709 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "PCBDDCResetSolvers" 715 PetscErrorCode PCBDDCResetSolvers(PC pc) 716 { 717 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 718 PetscScalar *array; 719 PetscErrorCode ierr; 720 721 PetscFunctionBegin; 722 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 723 if (pcbddc->coarse_phi_B) { 724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 725 ierr = PetscFree(array);CHKERRQ(ierr); 726 } 727 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 728 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 729 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 730 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 731 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 732 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 733 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 734 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 735 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 736 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 737 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 738 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 739 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 740 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 741 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 742 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 743 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 744 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 745 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 746 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 747 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 748 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 749 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 750 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 751 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 752 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 753 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 754 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 760 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 761 { 762 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 763 PC_IS *pcis = (PC_IS*)pc->data; 764 VecType impVecType; 765 PetscInt n_constraints,n_R,old_size,n_benign; 766 PetscErrorCode ierr; 767 768 PetscFunctionBegin; 769 if (!pcbddc->ConstraintMatrix) { 770 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 771 } 772 /* get sizes */ 773 n_benign = 0; 774 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 775 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 776 n_R = pcis->n - pcbddc->n_vertices; 777 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 778 /* local work vectors (try to avoid unneeded work)*/ 779 /* R nodes */ 780 old_size = -1; 781 if (pcbddc->vec1_R) { 782 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 783 } 784 if (n_R != old_size) { 785 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 786 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 787 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 788 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 789 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 790 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 791 } 792 /* local primal dofs */ 793 old_size = -1; 794 if (pcbddc->vec1_P) { 795 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 796 } 797 if (pcbddc->local_primal_size != old_size) { 798 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 799 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 800 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 801 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 802 } 803 /* local explicit constraints */ 804 old_size = -1; 805 if (pcbddc->vec1_C) { 806 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 807 } 808 if (n_constraints && n_constraints != old_size) { 809 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 810 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 811 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 812 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 813 } 814 PetscFunctionReturn(0); 815 } 816 817 #undef __FUNCT__ 818 #define __FUNCT__ "PCBDDCSetUpCorrection" 819 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 820 { 821 PetscErrorCode ierr; 822 /* pointers to pcis and pcbddc */ 823 PC_IS* pcis = (PC_IS*)pc->data; 824 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 825 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 826 /* submatrices of local problem */ 827 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 828 /* submatrices of benign trick */ 829 Mat B0_V = NULL; 830 /* submatrices of local coarse problem */ 831 Mat S_VV,S_CV,S_VC,S_CC; 832 /* working matrices */ 833 Mat C_CR; 834 /* additional working stuff */ 835 PC pc_R; 836 Mat F,B0 = NULL; 837 PetscBool isLU,isCHOL,isILU; 838 839 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 840 PetscScalar *work; 841 PetscInt *idx_V_B; 842 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 843 PetscInt i,n_R,n_D,n_B; 844 PetscBool unsymmetric_check; 845 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 846 MatType impMatType; 847 /* some shortcuts to scalars */ 848 PetscScalar one=1.0,m_one=-1.0; 849 850 PetscFunctionBegin; 851 n_benign = 0; 852 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 853 n_vertices = pcbddc->n_vertices; 854 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 855 /* Set Non-overlapping dimensions */ 856 n_B = pcis->n_B; 857 n_D = pcis->n - n_B; 858 n_R = pcis->n - n_vertices; 859 860 /* Set types for local objects needed by BDDC precondtioner */ 861 impMatType = MATSEQDENSE; 862 863 /* vertices in boundary numbering */ 864 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 865 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 866 if (i != n_vertices) { 867 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 868 } 869 870 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 871 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 873 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 875 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 877 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 879 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 880 881 unsymmetric_check = PETSC_FALSE; 882 /* allocate workspace */ 883 n = 0; 884 if (n_constraints) { 885 n += n_R*n_constraints; 886 } 887 if (n_vertices) { 888 n = PetscMax(2*n_R*n_vertices,n); 889 n = PetscMax((n_R+n_B)*n_vertices,n); 890 } 891 if (!pcbddc->symmetric_primal) { 892 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 893 unsymmetric_check = PETSC_TRUE; 894 } 895 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 896 897 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 898 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 899 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 900 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 901 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 902 if (isLU || isILU || isCHOL) { 903 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 904 } else if (sub_schurs->reuse_mumps) { 905 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 906 MatFactorType type; 907 908 F = reuse_mumps->F; 909 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 910 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 911 } else { 912 F = NULL; 913 } 914 915 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 916 if (n_constraints) { 917 Mat M1,M2,M3; 918 Mat auxmat; 919 IS is_aux; 920 PetscScalar *array,*array2; 921 922 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 923 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 924 925 /* Extract constraints on R nodes: C_{CR} */ 926 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 927 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 928 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 929 930 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 931 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 932 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 933 for (i=0;i<n_constraints;i++) { 934 const PetscScalar *row_cmat_values; 935 const PetscInt *row_cmat_indices; 936 PetscInt size_of_constraint,j; 937 938 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 939 for (j=0;j<size_of_constraint;j++) { 940 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 941 } 942 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 943 } 944 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 945 if (F) { 946 Mat B; 947 948 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 949 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 950 ierr = MatDestroy(&B);CHKERRQ(ierr); 951 } else { 952 PetscScalar *marr; 953 954 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 955 for (i=0;i<n_constraints;i++) { 956 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 957 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 958 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 960 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 961 } 962 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 963 } 964 if (!pcbddc->switch_static) { 965 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 966 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 967 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 968 for (i=0;i<n_constraints;i++) { 969 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 970 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 971 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 972 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 974 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 975 } 976 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 977 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 978 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 979 } else { 980 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 981 pcbddc->local_auxmat2 = local_auxmat2_R; 982 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 983 } 984 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 985 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 986 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 987 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 988 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 989 if (isCHOL) { 990 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 991 } else { 992 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 993 } 994 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 995 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 996 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 997 ierr = MatDestroy(&M2);CHKERRQ(ierr); 998 ierr = MatDestroy(&M3);CHKERRQ(ierr); 999 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1000 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1001 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1002 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1003 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1004 } 1005 /* Get submatrices from subdomain matrix */ 1006 if (n_benign) { 1007 IS dummy; 1008 Mat B0_R; 1009 PetscReal norm; 1010 PetscInt ii[2]; 1011 1012 ii[0] = 0; 1013 ii[1] = pcbddc->B0_ncol; 1014 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 1015 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 1016 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1017 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1018 if (norm > PETSC_SMALL) { 1019 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1020 } 1021 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1022 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1023 } 1024 1025 if (n_vertices) { 1026 IS is_aux; 1027 1028 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1029 IS tis; 1030 1031 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1032 ierr = ISSort(tis);CHKERRQ(ierr); 1033 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1034 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1035 } else { 1036 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1037 } 1038 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1040 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1041 if (n_benign) { 1042 IS dummy; 1043 1044 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 1045 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1046 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1047 } 1048 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1049 } 1050 1051 /* Matrix of coarse basis functions (local) */ 1052 if (pcbddc->coarse_phi_B) { 1053 PetscInt on_B,on_primal,on_D=n_D; 1054 if (pcbddc->coarse_phi_D) { 1055 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1056 } 1057 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1058 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1059 PetscScalar *marray; 1060 1061 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1062 ierr = PetscFree(marray);CHKERRQ(ierr); 1063 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1064 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1065 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1066 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1067 } 1068 } 1069 1070 if (!pcbddc->coarse_phi_B) { 1071 PetscScalar *marray; 1072 1073 n = n_B*pcbddc->local_primal_size; 1074 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1075 n += n_D*pcbddc->local_primal_size; 1076 } 1077 if (!pcbddc->symmetric_primal) { 1078 n *= 2; 1079 } 1080 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1081 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1082 n = n_B*pcbddc->local_primal_size; 1083 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1084 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1085 n += n_D*pcbddc->local_primal_size; 1086 } 1087 if (!pcbddc->symmetric_primal) { 1088 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1089 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1090 n = n_B*pcbddc->local_primal_size; 1091 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1092 } 1093 } else { 1094 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1095 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1096 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1097 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1098 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1099 } 1100 } 1101 } 1102 1103 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1104 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1105 const PetscInt *idxs; 1106 1107 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1108 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 1109 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1110 } 1111 1112 /* vertices */ 1113 if (n_vertices) { 1114 1115 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1116 1117 if (n_R) { 1118 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1119 PetscBLASInt B_N,B_one = 1; 1120 PetscScalar *x,*y; 1121 PetscBool isseqaij; 1122 1123 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1124 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1125 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1126 if (F) { /* TODO could be optimized for symmetric problems */ 1127 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1128 } else { 1129 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1130 for (i=0;i<n_vertices;i++) { 1131 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1132 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1133 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1134 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1135 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1136 } 1137 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1138 } 1139 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1140 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1141 if (n_constraints) { 1142 Mat B; 1143 1144 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1145 for (i=0;i<n_vertices;i++) { 1146 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1147 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1148 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1151 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1152 } 1153 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1154 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1155 ierr = MatDestroy(&B);CHKERRQ(ierr); 1156 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1157 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1158 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1159 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1160 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1161 ierr = MatDestroy(&B);CHKERRQ(ierr); 1162 } 1163 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1164 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1165 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1166 } 1167 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1168 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1169 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1170 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1171 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1172 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1173 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1174 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1175 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1176 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1177 } else { 1178 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1179 } 1180 if (n_benign) { 1181 const PetscScalar *vals; 1182 const PetscInt *idxs; 1183 PetscInt n,j; 1184 1185 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1186 for (j=0;j<n;j++) { 1187 coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1188 coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j]; 1189 } 1190 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1191 } 1192 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1193 1194 /* coarse basis functions */ 1195 for (i=0;i<n_vertices;i++) { 1196 PetscScalar *y; 1197 1198 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1199 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1200 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1201 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1202 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1203 y[n_B*i+idx_V_B[i]] = 1.0; 1204 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1205 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1206 1207 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1208 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1209 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1210 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1211 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1212 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1213 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1214 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1215 } 1216 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1217 } 1218 /* if n_R == 0 the object is not destroyed */ 1219 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1220 } 1221 1222 if (n_constraints) { 1223 Mat B; 1224 1225 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1226 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1227 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1228 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1229 if (n_vertices) { 1230 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1231 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1232 } else { 1233 Mat S_VCt; 1234 1235 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1236 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1237 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1238 } 1239 } 1240 ierr = MatDestroy(&B);CHKERRQ(ierr); 1241 /* coarse basis functions */ 1242 for (i=0;i<n_constraints;i++) { 1243 PetscScalar *y; 1244 1245 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1246 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1247 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1248 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1249 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1250 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1251 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1252 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1253 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1254 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1255 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1258 if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0; 1259 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1260 } 1261 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1262 } 1263 } 1264 if (n_constraints) { 1265 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1266 } 1267 1268 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1269 1270 /* compute other basis functions for non-symmetric problems */ 1271 if (!pcbddc->symmetric_primal) { 1272 1273 if (n_constraints) { 1274 Mat S_CCT,B_C; 1275 1276 /* this is a lazy thing */ 1277 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1278 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1279 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1280 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1281 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1282 if (n_vertices) { 1283 Mat B_V,S_VCT; 1284 1285 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1286 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1287 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1288 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1289 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1290 } 1291 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1292 } else { /* if there are no constraints, reset work */ 1293 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1294 } 1295 if (n_vertices && n_R) { 1296 Mat A_VRT; 1297 PetscScalar *marray; 1298 PetscBLASInt B_N,B_one = 1; 1299 1300 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1301 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1302 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1303 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1304 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1305 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1306 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1307 } 1308 1309 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1310 for (i=0;i<pcbddc->local_primal_size;i++) { 1311 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1312 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1313 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1314 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1315 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1316 } 1317 } else { 1318 for (i=0;i<pcbddc->local_primal_size;i++) { 1319 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1320 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1321 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1322 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1323 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1324 } 1325 } 1326 /* coarse basis functions */ 1327 for (i=0;i<pcbddc->local_primal_size;i++) { 1328 PetscScalar *y; 1329 1330 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1331 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1332 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1333 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1334 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1335 if (i<n_vertices) { 1336 y[n_B*i+idx_V_B[i]] = 1.0; 1337 } 1338 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1339 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1340 1341 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1342 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1343 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1344 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1345 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1346 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1347 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1348 } 1349 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1350 } 1351 } 1352 /* free memory */ 1353 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1354 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1355 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1356 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1357 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1358 ierr = PetscFree(work);CHKERRQ(ierr); 1359 if (n_vertices) { 1360 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1361 } 1362 if (n_constraints) { 1363 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1364 } 1365 /* Checking coarse_sub_mat and coarse basis functios */ 1366 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1367 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1368 if (pcbddc->dbg_flag) { 1369 Mat coarse_sub_mat; 1370 Mat AUXMAT,TM1,TM2,TM3,TM4; 1371 Mat coarse_phi_D,coarse_phi_B; 1372 Mat coarse_psi_D,coarse_psi_B; 1373 Mat A_II,A_BB,A_IB,A_BI; 1374 Mat C_B,CPHI; 1375 IS is_dummy; 1376 Vec mones; 1377 MatType checkmattype=MATSEQAIJ; 1378 PetscReal real_value; 1379 1380 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1381 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1382 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1383 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1384 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1385 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1386 if (unsymmetric_check) { 1387 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1388 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1389 } 1390 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1391 1392 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1395 if (unsymmetric_check) { 1396 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1397 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1398 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1399 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1400 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1401 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1402 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1403 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1404 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1405 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1406 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1407 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1408 } else { 1409 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1410 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1411 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1412 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1413 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1414 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1415 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1416 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1417 } 1418 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1419 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1420 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1421 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1422 if (n_benign) { 1423 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1424 PetscScalar *data,*data2; 1425 1426 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1427 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1428 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1429 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1430 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1431 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1432 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1433 for (i=0;i<pcbddc->local_primal_size;i++) { 1434 data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i]; 1435 data[(i+1)*pcbddc->local_primal_size-1] += data2[i]; 1436 } 1437 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1438 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1439 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1440 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1441 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1442 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1443 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1444 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1445 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1446 } 1447 #if 0 1448 { 1449 PetscViewer viewer; 1450 char filename[256]; 1451 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1452 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1453 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1454 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1455 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1456 } 1457 #endif 1458 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1459 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1462 1463 /* check constraints */ 1464 if (!n_benign) { 1465 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1466 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1467 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1468 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1469 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1470 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1471 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1473 if (unsymmetric_check) { 1474 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1475 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1476 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1477 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1479 } 1480 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1481 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1482 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1483 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1484 } 1485 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1486 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1487 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1488 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1489 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1490 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1491 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1492 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1493 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1494 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1495 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1496 if (unsymmetric_check) { 1497 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1498 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1499 } 1500 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1501 } 1502 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1503 /* get back data */ 1504 *coarse_submat_vals_n = coarse_submat_vals; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1510 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1511 { 1512 Mat *work_mat; 1513 IS isrow_s,iscol_s; 1514 PetscBool rsorted,csorted; 1515 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1516 PetscErrorCode ierr; 1517 1518 PetscFunctionBegin; 1519 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1520 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1521 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1522 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1523 1524 if (!rsorted) { 1525 const PetscInt *idxs; 1526 PetscInt *idxs_sorted,i; 1527 1528 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1529 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1530 for (i=0;i<rsize;i++) { 1531 idxs_perm_r[i] = i; 1532 } 1533 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1534 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1535 for (i=0;i<rsize;i++) { 1536 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1537 } 1538 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1539 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1540 } else { 1541 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1542 isrow_s = isrow; 1543 } 1544 1545 if (!csorted) { 1546 if (isrow == iscol) { 1547 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1548 iscol_s = isrow_s; 1549 } else { 1550 const PetscInt *idxs; 1551 PetscInt *idxs_sorted,i; 1552 1553 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1554 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1555 for (i=0;i<csize;i++) { 1556 idxs_perm_c[i] = i; 1557 } 1558 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1559 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1560 for (i=0;i<csize;i++) { 1561 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1562 } 1563 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1564 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1565 } 1566 } else { 1567 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1568 iscol_s = iscol; 1569 } 1570 1571 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1572 1573 if (!rsorted || !csorted) { 1574 Mat new_mat; 1575 IS is_perm_r,is_perm_c; 1576 1577 if (!rsorted) { 1578 PetscInt *idxs_r,i; 1579 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1580 for (i=0;i<rsize;i++) { 1581 idxs_r[idxs_perm_r[i]] = i; 1582 } 1583 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1584 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1585 } else { 1586 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1587 } 1588 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1589 1590 if (!csorted) { 1591 if (isrow_s == iscol_s) { 1592 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1593 is_perm_c = is_perm_r; 1594 } else { 1595 PetscInt *idxs_c,i; 1596 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1597 for (i=0;i<csize;i++) { 1598 idxs_c[idxs_perm_c[i]] = i; 1599 } 1600 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1601 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1602 } 1603 } else { 1604 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1605 } 1606 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1607 1608 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1609 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1610 work_mat[0] = new_mat; 1611 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1612 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1613 } 1614 1615 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1616 *B = work_mat[0]; 1617 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1618 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1619 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1625 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1626 { 1627 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1628 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1629 Mat new_mat; 1630 IS is_local,is_global; 1631 PetscInt local_size; 1632 PetscBool isseqaij; 1633 PetscErrorCode ierr; 1634 1635 PetscFunctionBegin; 1636 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1637 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1638 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1639 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1640 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1641 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1642 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1643 1644 /* check */ 1645 if (pcbddc->dbg_flag) { 1646 Vec x,x_change; 1647 PetscReal error; 1648 1649 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1650 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1651 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1652 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1653 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1655 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1657 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1658 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1659 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1660 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1661 ierr = VecDestroy(&x);CHKERRQ(ierr); 1662 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1663 } 1664 1665 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1666 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1667 if (isseqaij) { 1668 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1669 } else { 1670 Mat work_mat; 1671 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1672 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1673 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1674 } 1675 if (matis->A->symmetric_set) { 1676 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1677 #if !defined(PETSC_USE_COMPLEX) 1678 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1679 #endif 1680 } 1681 /* 1682 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1683 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1684 */ 1685 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1691 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1692 { 1693 PC_IS* pcis = (PC_IS*)(pc->data); 1694 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1695 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1696 PetscInt *idx_R_local=NULL; 1697 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1698 PetscInt vbs,bs; 1699 PetscBT bitmask=NULL; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 /* 1704 No need to setup local scatters if 1705 - primal space is unchanged 1706 AND 1707 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1708 AND 1709 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1710 */ 1711 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1712 PetscFunctionReturn(0); 1713 } 1714 /* destroy old objects */ 1715 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1716 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1717 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1718 /* Set Non-overlapping dimensions */ 1719 n_B = pcis->n_B; 1720 n_D = pcis->n - n_B; 1721 n_vertices = pcbddc->n_vertices; 1722 1723 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1724 1725 /* create auxiliary bitmask and allocate workspace */ 1726 if (!sub_schurs->reuse_mumps) { 1727 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1728 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1729 for (i=0;i<n_vertices;i++) { 1730 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1731 } 1732 1733 for (i=0, n_R=0; i<pcis->n; i++) { 1734 if (!PetscBTLookup(bitmask,i)) { 1735 idx_R_local[n_R++] = i; 1736 } 1737 } 1738 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1739 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1740 1741 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1742 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1743 } 1744 1745 /* Block code */ 1746 vbs = 1; 1747 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1748 if (bs>1 && !(n_vertices%bs)) { 1749 PetscBool is_blocked = PETSC_TRUE; 1750 PetscInt *vary; 1751 if (!sub_schurs->reuse_mumps) { 1752 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1753 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1754 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1755 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1756 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1757 for (i=0; i<pcis->n/bs; i++) { 1758 if (vary[i]!=0 && vary[i]!=bs) { 1759 is_blocked = PETSC_FALSE; 1760 break; 1761 } 1762 } 1763 ierr = PetscFree(vary);CHKERRQ(ierr); 1764 } else { 1765 /* Verify directly the R set */ 1766 for (i=0; i<n_R/bs; i++) { 1767 PetscInt j,node=idx_R_local[bs*i]; 1768 for (j=1; j<bs; j++) { 1769 if (node != idx_R_local[bs*i+j]-j) { 1770 is_blocked = PETSC_FALSE; 1771 break; 1772 } 1773 } 1774 } 1775 } 1776 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1777 vbs = bs; 1778 for (i=0;i<n_R/vbs;i++) { 1779 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1780 } 1781 } 1782 } 1783 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1784 if (sub_schurs->reuse_mumps) { 1785 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1786 1787 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1788 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1789 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1790 reuse_mumps->is_R = pcbddc->is_R_local; 1791 } else { 1792 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1793 } 1794 1795 /* print some info if requested */ 1796 if (pcbddc->dbg_flag) { 1797 PetscInt benign = 0; 1798 1799 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1800 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1801 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1802 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1804 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1805 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1806 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1807 } 1808 1809 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1810 if (!sub_schurs->reuse_mumps) { 1811 IS is_aux1,is_aux2; 1812 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1813 1814 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1815 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1816 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1817 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1818 for (i=0; i<n_D; i++) { 1819 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1820 } 1821 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1822 for (i=0, j=0; i<n_R; i++) { 1823 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1824 aux_array1[j++] = i; 1825 } 1826 } 1827 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1828 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1829 for (i=0, j=0; i<n_B; i++) { 1830 if (!PetscBTLookup(bitmask,is_indices[i])) { 1831 aux_array2[j++] = i; 1832 } 1833 } 1834 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1835 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1836 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1837 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1838 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1839 1840 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1841 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1842 for (i=0, j=0; i<n_R; i++) { 1843 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1844 aux_array1[j++] = i; 1845 } 1846 } 1847 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1848 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1849 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1850 } 1851 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1852 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1853 } else { 1854 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1855 IS tis; 1856 PetscInt schur_size; 1857 1858 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1859 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1860 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1861 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1862 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1863 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1864 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1865 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1866 } 1867 } 1868 PetscFunctionReturn(0); 1869 } 1870 1871 1872 #undef __FUNCT__ 1873 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1874 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1875 { 1876 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1877 PC_IS *pcis = (PC_IS*)pc->data; 1878 PC pc_temp; 1879 Mat A_RR; 1880 MatReuse reuse; 1881 PetscScalar m_one = -1.0; 1882 PetscReal value; 1883 PetscInt n_D,n_R; 1884 PetscBool use_exact,use_exact_reduced,issbaij; 1885 PetscErrorCode ierr; 1886 /* prefixes stuff */ 1887 char dir_prefix[256],neu_prefix[256],str_level[16]; 1888 size_t len; 1889 1890 PetscFunctionBegin; 1891 1892 /* compute prefixes */ 1893 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1894 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1895 if (!pcbddc->current_level) { 1896 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1897 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1898 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1899 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1900 } else { 1901 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1902 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1903 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1904 len -= 15; /* remove "pc_bddc_coarse_" */ 1905 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1906 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1907 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1908 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1909 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1910 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1911 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1912 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1913 } 1914 1915 /* DIRICHLET PROBLEM */ 1916 if (dirichlet) { 1917 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1918 if (pcbddc->local_mat->symmetric_set) { 1919 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1920 } 1921 /* Matrix for Dirichlet problem is pcis->A_II */ 1922 n_D = pcis->n - pcis->n_B; 1923 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1924 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1925 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1926 /* default */ 1927 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1928 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1929 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1930 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1931 if (issbaij) { 1932 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1933 } else { 1934 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1935 } 1936 /* Allow user's customization */ 1937 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1938 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1939 } 1940 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1941 if (sub_schurs->reuse_mumps) { 1942 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1943 1944 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1945 } 1946 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1947 if (!n_D) { 1948 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1949 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1950 } 1951 /* Set Up KSP for Dirichlet problem of BDDC */ 1952 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1953 /* set ksp_D into pcis data */ 1954 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1955 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1956 pcis->ksp_D = pcbddc->ksp_D; 1957 } 1958 1959 /* NEUMANN PROBLEM */ 1960 A_RR = 0; 1961 if (neumann) { 1962 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1963 PetscInt ibs,mbs; 1964 PetscBool issbaij; 1965 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1966 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1967 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1968 if (pcbddc->ksp_R) { /* already created ksp */ 1969 PetscInt nn_R; 1970 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1971 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1972 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1973 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1974 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1975 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1976 reuse = MAT_INITIAL_MATRIX; 1977 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1978 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1979 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1980 reuse = MAT_INITIAL_MATRIX; 1981 } else { /* safe to reuse the matrix */ 1982 reuse = MAT_REUSE_MATRIX; 1983 } 1984 } 1985 /* last check */ 1986 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1987 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1988 reuse = MAT_INITIAL_MATRIX; 1989 } 1990 } else { /* first time, so we need to create the matrix */ 1991 reuse = MAT_INITIAL_MATRIX; 1992 } 1993 /* extract A_RR */ 1994 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1995 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1996 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1997 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1998 if (matis->A == pcbddc->local_mat) { 1999 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2000 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2001 } else { 2002 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2003 } 2004 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2005 if (matis->A == pcbddc->local_mat) { 2006 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2007 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2008 } else { 2009 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2010 } 2011 } 2012 if (!sub_schurs->reuse_mumps) { 2013 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2014 if (pcbddc->local_mat->symmetric_set) { 2015 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2016 } 2017 } else { 2018 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2019 2020 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2021 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2022 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2023 } 2024 if (!pcbddc->ksp_R) { /* create object if not present */ 2025 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2026 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2027 /* default */ 2028 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2029 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2030 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2031 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2032 if (issbaij) { 2033 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2034 } else { 2035 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2036 } 2037 /* Allow user's customization */ 2038 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2039 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2040 } 2041 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2042 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2043 if (!n_R) { 2044 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2045 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2046 } 2047 /* Reuse MUMPS solver if it is present */ 2048 if (sub_schurs->reuse_mumps) { 2049 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2050 2051 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2052 } 2053 /* Set Up KSP for Neumann problem of BDDC */ 2054 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2055 } 2056 /* free Neumann problem's matrix */ 2057 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2058 2059 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2060 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2061 if (pcbddc->dbg_flag) { 2062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2063 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2065 } 2066 if (dirichlet) { /* Dirichlet */ 2067 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2068 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2069 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2070 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2071 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2072 /* need to be adapted? */ 2073 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2074 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2075 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2076 /* print info */ 2077 if (pcbddc->dbg_flag) { 2078 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2079 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2080 } 2081 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2082 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2083 } 2084 } 2085 if (neumann) { /* Neumann */ 2086 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2087 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2088 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2089 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2090 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2091 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2092 /* need to be adapted? */ 2093 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2094 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2095 /* print info */ 2096 if (pcbddc->dbg_flag) { 2097 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2099 } 2100 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2101 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2102 } 2103 } 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2110 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2111 { 2112 PetscErrorCode ierr; 2113 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2114 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2115 2116 PetscFunctionBegin; 2117 if (!sub_schurs->reuse_mumps) { 2118 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2119 } 2120 if (!pcbddc->switch_static) { 2121 if (applytranspose && pcbddc->local_auxmat1) { 2122 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2123 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2124 } 2125 if (!sub_schurs->reuse_mumps) { 2126 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2127 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2128 } else { 2129 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2130 2131 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2132 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2133 } 2134 } else { 2135 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2136 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2137 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2138 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2139 if (applytranspose && pcbddc->local_auxmat1) { 2140 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2141 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2142 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2143 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2144 } 2145 } 2146 if (!sub_schurs->reuse_mumps) { 2147 if (applytranspose) { 2148 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2149 } else { 2150 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2151 } 2152 #if defined(PETSC_HAVE_MUMPS) 2153 } else { 2154 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2155 2156 if (applytranspose) { 2157 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2158 } else { 2159 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2160 } 2161 #endif 2162 } 2163 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2164 if (!pcbddc->switch_static) { 2165 if (!sub_schurs->reuse_mumps) { 2166 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2167 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2168 } else { 2169 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2170 2171 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2172 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2173 } 2174 if (!applytranspose && pcbddc->local_auxmat1) { 2175 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2176 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2177 } 2178 } else { 2179 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2180 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2181 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2182 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 if (!applytranspose && pcbddc->local_auxmat1) { 2184 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2185 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2186 } 2187 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2188 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2189 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2190 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2191 } 2192 PetscFunctionReturn(0); 2193 } 2194 2195 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2198 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2199 { 2200 PetscErrorCode ierr; 2201 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2202 PC_IS* pcis = (PC_IS*) (pc->data); 2203 const PetscScalar zero = 0.0; 2204 2205 PetscFunctionBegin; 2206 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2207 if (applytranspose) { 2208 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2209 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2210 } else { 2211 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2212 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2213 } 2214 2215 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2216 if (pcbddc->benign_p0_lidx >= 0) { 2217 PetscScalar *array; 2218 2219 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2220 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2221 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2222 } 2223 2224 /* start communications from local primal nodes to rhs of coarse solver */ 2225 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2226 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2227 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2228 2229 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2230 /* TODO remove null space when doing multilevel */ 2231 if (pcbddc->coarse_ksp) { 2232 Vec rhs,sol; 2233 2234 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2235 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2236 if (applytranspose) { 2237 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2238 } else { 2239 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2240 } 2241 } 2242 2243 /* Local solution on R nodes */ 2244 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2245 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2246 } 2247 2248 /* communications from coarse sol to local primal nodes */ 2249 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2250 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2251 2252 /* Sum contributions from two levels */ 2253 if (applytranspose) { 2254 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2255 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2256 } else { 2257 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2258 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2259 } 2260 /* store p0 */ 2261 if (pcbddc->benign_p0_lidx >= 0) { 2262 PetscScalar *array; 2263 2264 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2265 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2266 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2267 } 2268 PetscFunctionReturn(0); 2269 } 2270 2271 #undef __FUNCT__ 2272 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2273 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2274 { 2275 PetscErrorCode ierr; 2276 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2277 PetscScalar *array; 2278 Vec from,to; 2279 2280 PetscFunctionBegin; 2281 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2282 from = pcbddc->coarse_vec; 2283 to = pcbddc->vec1_P; 2284 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2285 Vec tvec; 2286 2287 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2288 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2289 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2290 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2291 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2292 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2293 } 2294 } else { /* from local to global -> put data in coarse right hand side */ 2295 from = pcbddc->vec1_P; 2296 to = pcbddc->coarse_vec; 2297 } 2298 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2304 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2305 { 2306 PetscErrorCode ierr; 2307 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2308 PetscScalar *array; 2309 Vec from,to; 2310 2311 PetscFunctionBegin; 2312 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2313 from = pcbddc->coarse_vec; 2314 to = pcbddc->vec1_P; 2315 } else { /* from local to global -> put data in coarse right hand side */ 2316 from = pcbddc->vec1_P; 2317 to = pcbddc->coarse_vec; 2318 } 2319 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2320 if (smode == SCATTER_FORWARD) { 2321 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2322 Vec tvec; 2323 2324 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2325 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2326 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2327 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2328 } 2329 } else { 2330 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2331 ierr = VecResetArray(from);CHKERRQ(ierr); 2332 } 2333 } 2334 PetscFunctionReturn(0); 2335 } 2336 2337 /* uncomment for testing purposes */ 2338 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2341 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2342 { 2343 PetscErrorCode ierr; 2344 PC_IS* pcis = (PC_IS*)(pc->data); 2345 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2346 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2347 /* one and zero */ 2348 PetscScalar one=1.0,zero=0.0; 2349 /* space to store constraints and their local indices */ 2350 PetscScalar *constraints_data; 2351 PetscInt *constraints_idxs,*constraints_idxs_B; 2352 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2353 PetscInt *constraints_n; 2354 /* iterators */ 2355 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2356 /* BLAS integers */ 2357 PetscBLASInt lwork,lierr; 2358 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2359 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2360 /* reuse */ 2361 PetscInt olocal_primal_size,olocal_primal_size_cc; 2362 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2363 /* change of basis */ 2364 PetscBool qr_needed; 2365 PetscBT change_basis,qr_needed_idx; 2366 /* auxiliary stuff */ 2367 PetscInt *nnz,*is_indices; 2368 PetscInt ncc; 2369 /* some quantities */ 2370 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2371 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2372 2373 PetscFunctionBegin; 2374 /* Destroy Mat objects computed previously */ 2375 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2376 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2377 /* save info on constraints from previous setup (if any) */ 2378 olocal_primal_size = pcbddc->local_primal_size; 2379 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2380 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2381 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2382 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2383 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2384 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2385 2386 /* print some info */ 2387 if (pcbddc->dbg_flag) { 2388 IS vertices; 2389 PetscInt nv,nedges,nfaces; 2390 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2391 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2392 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2393 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2394 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2395 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2396 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2398 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2399 } 2400 2401 if (!pcbddc->adaptive_selection) { 2402 IS ISForVertices,*ISForFaces,*ISForEdges; 2403 MatNullSpace nearnullsp; 2404 const Vec *nearnullvecs; 2405 Vec *localnearnullsp; 2406 PetscScalar *array; 2407 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2408 PetscBool nnsp_has_cnst; 2409 /* LAPACK working arrays for SVD or POD */ 2410 PetscBool skip_lapack,boolforchange; 2411 PetscScalar *work; 2412 PetscReal *singular_vals; 2413 #if defined(PETSC_USE_COMPLEX) 2414 PetscReal *rwork; 2415 #endif 2416 #if defined(PETSC_MISSING_LAPACK_GESVD) 2417 PetscScalar *temp_basis,*correlation_mat; 2418 #else 2419 PetscBLASInt dummy_int=1; 2420 PetscScalar dummy_scalar=1.; 2421 #endif 2422 2423 /* Get index sets for faces, edges and vertices from graph */ 2424 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2425 /* free unneeded index sets */ 2426 if (!pcbddc->use_vertices) { 2427 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2428 } 2429 if (!pcbddc->use_edges) { 2430 for (i=0;i<n_ISForEdges;i++) { 2431 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2432 } 2433 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2434 n_ISForEdges = 0; 2435 } 2436 if (!pcbddc->use_faces) { 2437 for (i=0;i<n_ISForFaces;i++) { 2438 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2439 } 2440 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2441 n_ISForFaces = 0; 2442 } 2443 2444 #if defined(PETSC_USE_DEBUG) 2445 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2446 Also use_change_of_basis should be consistent among processors */ 2447 if (pcbddc->NullSpace) { 2448 PetscBool tbool[2],gbool[2]; 2449 2450 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2451 pcbddc->use_change_of_basis = PETSC_TRUE; 2452 if (!ISForEdges) { 2453 pcbddc->use_change_on_faces = PETSC_TRUE; 2454 } 2455 } 2456 tbool[0] = pcbddc->use_change_of_basis; 2457 tbool[1] = pcbddc->use_change_on_faces; 2458 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2459 pcbddc->use_change_of_basis = gbool[0]; 2460 pcbddc->use_change_on_faces = gbool[1]; 2461 } 2462 #endif 2463 2464 /* check if near null space is attached to global mat */ 2465 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2466 if (nearnullsp) { 2467 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2468 /* remove any stored info */ 2469 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2470 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2471 /* store information for BDDC solver reuse */ 2472 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2473 pcbddc->onearnullspace = nearnullsp; 2474 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2475 for (i=0;i<nnsp_size;i++) { 2476 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2477 } 2478 } else { /* if near null space is not provided BDDC uses constants by default */ 2479 nnsp_size = 0; 2480 nnsp_has_cnst = PETSC_TRUE; 2481 } 2482 /* get max number of constraints on a single cc */ 2483 max_constraints = nnsp_size; 2484 if (nnsp_has_cnst) max_constraints++; 2485 2486 /* 2487 Evaluate maximum storage size needed by the procedure 2488 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2489 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2490 There can be multiple constraints per connected component 2491 */ 2492 n_vertices = 0; 2493 if (ISForVertices) { 2494 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2495 } 2496 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2497 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2498 2499 total_counts = n_ISForFaces+n_ISForEdges; 2500 total_counts *= max_constraints; 2501 total_counts += n_vertices; 2502 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2503 2504 total_counts = 0; 2505 max_size_of_constraint = 0; 2506 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2507 IS used_is; 2508 if (i<n_ISForEdges) { 2509 used_is = ISForEdges[i]; 2510 } else { 2511 used_is = ISForFaces[i-n_ISForEdges]; 2512 } 2513 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2514 total_counts += j; 2515 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2516 } 2517 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2518 2519 /* get local part of global near null space vectors */ 2520 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2521 for (k=0;k<nnsp_size;k++) { 2522 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2523 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2524 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2525 } 2526 2527 /* whether or not to skip lapack calls */ 2528 skip_lapack = PETSC_TRUE; 2529 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2530 2531 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2532 if (!skip_lapack) { 2533 PetscScalar temp_work; 2534 2535 #if defined(PETSC_MISSING_LAPACK_GESVD) 2536 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2537 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2538 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2539 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2540 #if defined(PETSC_USE_COMPLEX) 2541 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2542 #endif 2543 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2544 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2545 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2546 lwork = -1; 2547 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2548 #if !defined(PETSC_USE_COMPLEX) 2549 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2550 #else 2551 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2552 #endif 2553 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2554 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2555 #else /* on missing GESVD */ 2556 /* SVD */ 2557 PetscInt max_n,min_n; 2558 max_n = max_size_of_constraint; 2559 min_n = max_constraints; 2560 if (max_size_of_constraint < max_constraints) { 2561 min_n = max_size_of_constraint; 2562 max_n = max_constraints; 2563 } 2564 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2565 #if defined(PETSC_USE_COMPLEX) 2566 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2567 #endif 2568 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2569 lwork = -1; 2570 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2571 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2572 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2573 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2574 #if !defined(PETSC_USE_COMPLEX) 2575 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2576 #else 2577 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2578 #endif 2579 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2580 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2581 #endif /* on missing GESVD */ 2582 /* Allocate optimal workspace */ 2583 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2584 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2585 } 2586 /* Now we can loop on constraining sets */ 2587 total_counts = 0; 2588 constraints_idxs_ptr[0] = 0; 2589 constraints_data_ptr[0] = 0; 2590 /* vertices */ 2591 if (n_vertices) { 2592 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2593 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2594 for (i=0;i<n_vertices;i++) { 2595 constraints_n[total_counts] = 1; 2596 constraints_data[total_counts] = 1.0; 2597 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2598 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2599 total_counts++; 2600 } 2601 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2602 n_vertices = total_counts; 2603 } 2604 2605 /* edges and faces */ 2606 total_counts_cc = total_counts; 2607 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2608 IS used_is; 2609 PetscBool idxs_copied = PETSC_FALSE; 2610 2611 if (ncc<n_ISForEdges) { 2612 used_is = ISForEdges[ncc]; 2613 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2614 } else { 2615 used_is = ISForFaces[ncc-n_ISForEdges]; 2616 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2617 } 2618 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2619 2620 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2621 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2622 /* change of basis should not be performed on local periodic nodes */ 2623 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2624 if (nnsp_has_cnst) { 2625 PetscScalar quad_value; 2626 2627 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2628 idxs_copied = PETSC_TRUE; 2629 2630 if (!pcbddc->use_nnsp_true) { 2631 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2632 } else { 2633 quad_value = 1.0; 2634 } 2635 for (j=0;j<size_of_constraint;j++) { 2636 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2637 } 2638 temp_constraints++; 2639 total_counts++; 2640 } 2641 for (k=0;k<nnsp_size;k++) { 2642 PetscReal real_value; 2643 PetscScalar *ptr_to_data; 2644 2645 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2646 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2647 for (j=0;j<size_of_constraint;j++) { 2648 ptr_to_data[j] = array[is_indices[j]]; 2649 } 2650 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2651 /* check if array is null on the connected component */ 2652 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2653 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2654 if (real_value > 0.0) { /* keep indices and values */ 2655 temp_constraints++; 2656 total_counts++; 2657 if (!idxs_copied) { 2658 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2659 idxs_copied = PETSC_TRUE; 2660 } 2661 } 2662 } 2663 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2664 valid_constraints = temp_constraints; 2665 if (!pcbddc->use_nnsp_true && temp_constraints) { 2666 if (temp_constraints == 1) { /* just normalize the constraint */ 2667 PetscScalar norm,*ptr_to_data; 2668 2669 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2670 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2671 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2672 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2673 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2674 } else { /* perform SVD */ 2675 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2676 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2677 2678 #if defined(PETSC_MISSING_LAPACK_GESVD) 2679 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2680 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2681 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2682 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2683 from that computed using LAPACKgesvd 2684 -> This is due to a different computation of eigenvectors in LAPACKheev 2685 -> The quality of the POD-computed basis will be the same */ 2686 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2687 /* Store upper triangular part of correlation matrix */ 2688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2689 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2690 for (j=0;j<temp_constraints;j++) { 2691 for (k=0;k<j+1;k++) { 2692 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2693 } 2694 } 2695 /* compute eigenvalues and eigenvectors of correlation matrix */ 2696 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2697 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2698 #if !defined(PETSC_USE_COMPLEX) 2699 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2700 #else 2701 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2702 #endif 2703 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2704 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2705 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2706 j = 0; 2707 while (j < temp_constraints && singular_vals[j] < tol) j++; 2708 total_counts = total_counts-j; 2709 valid_constraints = temp_constraints-j; 2710 /* scale and copy POD basis into used quadrature memory */ 2711 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2712 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2713 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2714 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2715 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2716 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2717 if (j<temp_constraints) { 2718 PetscInt ii; 2719 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2720 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2721 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2722 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2723 for (k=0;k<temp_constraints-j;k++) { 2724 for (ii=0;ii<size_of_constraint;ii++) { 2725 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2726 } 2727 } 2728 } 2729 #else /* on missing GESVD */ 2730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2731 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2732 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2733 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2734 #if !defined(PETSC_USE_COMPLEX) 2735 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2736 #else 2737 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2738 #endif 2739 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2740 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2741 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2742 k = temp_constraints; 2743 if (k > size_of_constraint) k = size_of_constraint; 2744 j = 0; 2745 while (j < k && singular_vals[k-j-1] < tol) j++; 2746 valid_constraints = k-j; 2747 total_counts = total_counts-temp_constraints+valid_constraints; 2748 #endif /* on missing GESVD */ 2749 } 2750 } 2751 /* update pointers information */ 2752 if (valid_constraints) { 2753 constraints_n[total_counts_cc] = valid_constraints; 2754 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2755 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2756 /* set change_of_basis flag */ 2757 if (boolforchange) { 2758 PetscBTSet(change_basis,total_counts_cc); 2759 } 2760 total_counts_cc++; 2761 } 2762 } 2763 /* free workspace */ 2764 if (!skip_lapack) { 2765 ierr = PetscFree(work);CHKERRQ(ierr); 2766 #if defined(PETSC_USE_COMPLEX) 2767 ierr = PetscFree(rwork);CHKERRQ(ierr); 2768 #endif 2769 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2770 #if defined(PETSC_MISSING_LAPACK_GESVD) 2771 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2772 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2773 #endif 2774 } 2775 for (k=0;k<nnsp_size;k++) { 2776 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2777 } 2778 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2779 /* free index sets of faces, edges and vertices */ 2780 for (i=0;i<n_ISForFaces;i++) { 2781 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2782 } 2783 if (n_ISForFaces) { 2784 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2785 } 2786 for (i=0;i<n_ISForEdges;i++) { 2787 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2788 } 2789 if (n_ISForEdges) { 2790 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2791 } 2792 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2793 } else { 2794 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2795 2796 total_counts = 0; 2797 n_vertices = 0; 2798 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2799 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2800 } 2801 max_constraints = 0; 2802 total_counts_cc = 0; 2803 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2804 total_counts += pcbddc->adaptive_constraints_n[i]; 2805 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2806 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2807 } 2808 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2809 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2810 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2811 constraints_data = pcbddc->adaptive_constraints_data; 2812 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2813 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2814 total_counts_cc = 0; 2815 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2816 if (pcbddc->adaptive_constraints_n[i]) { 2817 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2818 } 2819 } 2820 #if 0 2821 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2822 for (i=0;i<total_counts_cc;i++) { 2823 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2824 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2825 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2826 printf(" %d",constraints_idxs[j]); 2827 } 2828 printf("\n"); 2829 printf("number of cc: %d\n",constraints_n[i]); 2830 } 2831 for (i=0;i<n_vertices;i++) { 2832 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2833 } 2834 for (i=0;i<sub_schurs->n_subs;i++) { 2835 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2836 } 2837 #endif 2838 2839 max_size_of_constraint = 0; 2840 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2841 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2842 /* Change of basis */ 2843 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2844 if (pcbddc->use_change_of_basis) { 2845 for (i=0;i<sub_schurs->n_subs;i++) { 2846 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2847 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2848 } 2849 } 2850 } 2851 } 2852 pcbddc->local_primal_size = total_counts; 2853 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2854 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2855 2856 /* map constraints_idxs in boundary numbering */ 2857 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2858 if (i != constraints_idxs_ptr[total_counts_cc]) { 2859 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2860 } 2861 2862 /* Create constraint matrix */ 2863 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2864 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2865 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2866 2867 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2868 /* determine if a QR strategy is needed for change of basis */ 2869 qr_needed = PETSC_FALSE; 2870 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2871 total_primal_vertices=0; 2872 pcbddc->local_primal_size_cc = 0; 2873 for (i=0;i<total_counts_cc;i++) { 2874 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2875 if (size_of_constraint == 1) { 2876 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2877 pcbddc->local_primal_size_cc += 1; 2878 } else if (PetscBTLookup(change_basis,i)) { 2879 for (k=0;k<constraints_n[i];k++) { 2880 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2881 } 2882 pcbddc->local_primal_size_cc += constraints_n[i]; 2883 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2884 PetscBTSet(qr_needed_idx,i); 2885 qr_needed = PETSC_TRUE; 2886 } 2887 } else { 2888 pcbddc->local_primal_size_cc += 1; 2889 } 2890 } 2891 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2892 pcbddc->n_vertices = total_primal_vertices; 2893 /* permute indices in order to have a sorted set of vertices */ 2894 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2895 2896 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2897 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2898 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2899 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2900 2901 /* nonzero structure of constraint matrix */ 2902 /* and get reference dof for local constraints */ 2903 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2904 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2905 2906 j = total_primal_vertices; 2907 total_counts = total_primal_vertices; 2908 cum = total_primal_vertices; 2909 for (i=n_vertices;i<total_counts_cc;i++) { 2910 if (!PetscBTLookup(change_basis,i)) { 2911 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2912 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2913 cum++; 2914 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2915 for (k=0;k<constraints_n[i];k++) { 2916 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2917 nnz[j+k] = size_of_constraint; 2918 } 2919 j += constraints_n[i]; 2920 } 2921 } 2922 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2923 ierr = PetscFree(nnz);CHKERRQ(ierr); 2924 2925 /* set values in constraint matrix */ 2926 for (i=0;i<total_primal_vertices;i++) { 2927 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2928 } 2929 total_counts = total_primal_vertices; 2930 for (i=n_vertices;i<total_counts_cc;i++) { 2931 if (!PetscBTLookup(change_basis,i)) { 2932 PetscInt *cols; 2933 2934 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2935 cols = constraints_idxs+constraints_idxs_ptr[i]; 2936 for (k=0;k<constraints_n[i];k++) { 2937 PetscInt row = total_counts+k; 2938 PetscScalar *vals; 2939 2940 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2941 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2942 } 2943 total_counts += constraints_n[i]; 2944 } 2945 } 2946 /* assembling */ 2947 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2948 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2949 2950 /* 2951 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2952 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2953 */ 2954 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2955 if (pcbddc->use_change_of_basis) { 2956 /* dual and primal dofs on a single cc */ 2957 PetscInt dual_dofs,primal_dofs; 2958 /* working stuff for GEQRF */ 2959 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2960 PetscBLASInt lqr_work; 2961 /* working stuff for UNGQR */ 2962 PetscScalar *gqr_work,lgqr_work_t; 2963 PetscBLASInt lgqr_work; 2964 /* working stuff for TRTRS */ 2965 PetscScalar *trs_rhs; 2966 PetscBLASInt Blas_NRHS; 2967 /* pointers for values insertion into change of basis matrix */ 2968 PetscInt *start_rows,*start_cols; 2969 PetscScalar *start_vals; 2970 /* working stuff for values insertion */ 2971 PetscBT is_primal; 2972 PetscInt *aux_primal_numbering_B; 2973 /* matrix sizes */ 2974 PetscInt global_size,local_size; 2975 /* temporary change of basis */ 2976 Mat localChangeOfBasisMatrix; 2977 /* extra space for debugging */ 2978 PetscScalar *dbg_work; 2979 2980 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2981 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2982 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2983 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2984 /* nonzeros for local mat */ 2985 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2986 for (i=0;i<pcis->n;i++) nnz[i]=1; 2987 for (i=n_vertices;i<total_counts_cc;i++) { 2988 if (PetscBTLookup(change_basis,i)) { 2989 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2990 if (PetscBTLookup(qr_needed_idx,i)) { 2991 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2992 } else { 2993 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2994 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2995 } 2996 } 2997 } 2998 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2999 ierr = PetscFree(nnz);CHKERRQ(ierr); 3000 /* Set initial identity in the matrix */ 3001 for (i=0;i<pcis->n;i++) { 3002 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3003 } 3004 3005 if (pcbddc->dbg_flag) { 3006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3007 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3008 } 3009 3010 3011 /* Now we loop on the constraints which need a change of basis */ 3012 /* 3013 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3014 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3015 3016 Basic blocks of change of basis matrix T computed by 3017 3018 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3019 3020 | 1 0 ... 0 s_1/S | 3021 | 0 1 ... 0 s_2/S | 3022 | ... | 3023 | 0 ... 1 s_{n-1}/S | 3024 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3025 3026 with S = \sum_{i=1}^n s_i^2 3027 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3028 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3029 3030 - QR decomposition of constraints otherwise 3031 */ 3032 if (qr_needed) { 3033 /* space to store Q */ 3034 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3035 /* first we issue queries for optimal work */ 3036 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3037 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3038 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3039 lqr_work = -1; 3040 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3041 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3042 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3043 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3044 lgqr_work = -1; 3045 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3046 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3047 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3048 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3049 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3050 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3051 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3052 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3053 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3054 /* array to store scaling factors for reflectors */ 3055 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3056 /* array to store rhs and solution of triangular solver */ 3057 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3058 /* allocating workspace for check */ 3059 if (pcbddc->dbg_flag) { 3060 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3061 } 3062 } 3063 /* array to store whether a node is primal or not */ 3064 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3065 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3066 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3067 if (i != total_primal_vertices) { 3068 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3069 } 3070 for (i=0;i<total_primal_vertices;i++) { 3071 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3072 } 3073 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3074 3075 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3076 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3077 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3078 if (PetscBTLookup(change_basis,total_counts)) { 3079 /* get constraint info */ 3080 primal_dofs = constraints_n[total_counts]; 3081 dual_dofs = size_of_constraint-primal_dofs; 3082 3083 if (pcbddc->dbg_flag) { 3084 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3085 } 3086 3087 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3088 3089 /* copy quadrature constraints for change of basis check */ 3090 if (pcbddc->dbg_flag) { 3091 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3092 } 3093 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3094 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3095 3096 /* compute QR decomposition of constraints */ 3097 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3098 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3099 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3100 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3101 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3102 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3103 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3104 3105 /* explictly compute R^-T */ 3106 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3107 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3108 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3109 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3110 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3111 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3112 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3113 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3114 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3115 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3116 3117 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3118 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3119 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3120 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3121 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3122 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3123 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3124 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3125 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3126 3127 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3128 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3129 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3130 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3131 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3132 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3133 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3134 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3135 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3136 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3137 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3138 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3139 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3140 3141 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3142 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3143 /* insert cols for primal dofs */ 3144 for (j=0;j<primal_dofs;j++) { 3145 start_vals = &qr_basis[j*size_of_constraint]; 3146 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3147 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3148 } 3149 /* insert cols for dual dofs */ 3150 for (j=0,k=0;j<dual_dofs;k++) { 3151 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3152 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3153 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3154 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3155 j++; 3156 } 3157 } 3158 3159 /* check change of basis */ 3160 if (pcbddc->dbg_flag) { 3161 PetscInt ii,jj; 3162 PetscBool valid_qr=PETSC_TRUE; 3163 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3164 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3165 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3166 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3167 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3168 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3169 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3170 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3171 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3172 for (jj=0;jj<size_of_constraint;jj++) { 3173 for (ii=0;ii<primal_dofs;ii++) { 3174 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3175 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3176 } 3177 } 3178 if (!valid_qr) { 3179 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3180 for (jj=0;jj<size_of_constraint;jj++) { 3181 for (ii=0;ii<primal_dofs;ii++) { 3182 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3183 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3184 } 3185 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3186 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3187 } 3188 } 3189 } 3190 } else { 3191 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3192 } 3193 } 3194 } else { /* simple transformation block */ 3195 PetscInt row,col; 3196 PetscScalar val,norm; 3197 3198 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3199 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3200 for (j=0;j<size_of_constraint;j++) { 3201 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3202 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3203 if (!PetscBTLookup(is_primal,row_B)) { 3204 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3205 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3206 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3207 } else { 3208 for (k=0;k<size_of_constraint;k++) { 3209 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3210 if (row != col) { 3211 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3212 } else { 3213 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3214 } 3215 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3216 } 3217 } 3218 } 3219 if (pcbddc->dbg_flag) { 3220 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3221 } 3222 } 3223 } else { 3224 if (pcbddc->dbg_flag) { 3225 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3226 } 3227 } 3228 } 3229 3230 /* free workspace */ 3231 if (qr_needed) { 3232 if (pcbddc->dbg_flag) { 3233 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3234 } 3235 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3236 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3237 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3238 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3239 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3240 } 3241 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3242 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3243 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3244 3245 /* assembling of global change of variable */ 3246 { 3247 Mat tmat; 3248 PetscInt bs; 3249 3250 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3251 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3252 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3253 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3254 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3255 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3256 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3257 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3258 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3259 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3260 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3261 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3262 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3263 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3264 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3265 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3266 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3267 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3268 } 3269 /* check */ 3270 if (pcbddc->dbg_flag) { 3271 PetscReal error; 3272 Vec x,x_change; 3273 3274 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3275 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3276 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3277 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3278 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3279 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3280 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3281 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3282 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3283 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3284 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3285 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3286 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3287 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3288 ierr = VecDestroy(&x);CHKERRQ(ierr); 3289 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3290 } 3291 3292 /* adapt sub_schurs computed (if any) */ 3293 if (pcbddc->use_deluxe_scaling) { 3294 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3295 if (sub_schurs->S_Ej_all) { 3296 Mat S_new,tmat; 3297 ISLocalToGlobalMapping NtoSall; 3298 IS is_all_N,is_V,is_V_Sall; 3299 const PetscScalar *array; 3300 const PetscInt *idxs_V,*idxs_all; 3301 PetscInt i,n_V; 3302 3303 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3304 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3305 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3306 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3307 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3308 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3309 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3310 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3311 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3312 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3313 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3314 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3315 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3316 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3317 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3318 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3319 for (i=0;i<n_V;i++) { 3320 PetscScalar val; 3321 PetscInt idx; 3322 3323 idx = idxs_V[i]; 3324 val = array[idxs_all[idxs_V[i]]]; 3325 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3326 } 3327 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 sub_schurs->S_Ej_all = S_new; 3330 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3331 if (sub_schurs->sum_S_Ej_all) { 3332 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3333 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3334 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3335 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3336 sub_schurs->sum_S_Ej_all = S_new; 3337 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3338 } 3339 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3340 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3341 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3342 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3343 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3344 } 3345 } 3346 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3347 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3348 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3349 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3350 } 3351 3352 /* set up change of basis context */ 3353 if (pcbddc->ChangeOfBasisMatrix) { 3354 PCBDDCChange_ctx change_ctx; 3355 3356 if (!pcbddc->new_global_mat) { 3357 PetscInt global_size,local_size; 3358 3359 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3360 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3361 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3362 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3363 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3364 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3365 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3366 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3367 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3368 } else { 3369 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3370 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3371 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3372 } 3373 if (!pcbddc->user_ChangeOfBasisMatrix) { 3374 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3375 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3376 } else { 3377 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3378 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3379 } 3380 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3381 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3382 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3383 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3384 } 3385 3386 /* add pressure dof to set of primal nodes for numbering purposes */ 3387 if (pcbddc->benign_p0_lidx >= 0) { 3388 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3389 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx; 3390 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3391 pcbddc->local_primal_size_cc++; 3392 pcbddc->local_primal_size++; 3393 } 3394 3395 /* check if a new primal space has been introduced (also take into account benign trick) */ 3396 pcbddc->new_primal_space_local = PETSC_TRUE; 3397 if (olocal_primal_size == pcbddc->local_primal_size) { 3398 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3399 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3400 if (!pcbddc->new_primal_space_local) { 3401 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3402 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3403 } 3404 } 3405 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3406 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3407 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3408 3409 /* flush dbg viewer */ 3410 if (pcbddc->dbg_flag) { 3411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3412 } 3413 3414 /* free workspace */ 3415 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3416 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3417 if (!pcbddc->adaptive_selection) { 3418 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3419 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3420 } else { 3421 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3422 pcbddc->adaptive_constraints_idxs_ptr, 3423 pcbddc->adaptive_constraints_data_ptr, 3424 pcbddc->adaptive_constraints_idxs, 3425 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3426 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3427 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3428 } 3429 PetscFunctionReturn(0); 3430 } 3431 3432 #undef __FUNCT__ 3433 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3434 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3435 { 3436 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3437 PC_IS *pcis = (PC_IS*)pc->data; 3438 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3439 PetscInt ierr,i,vertex_size,N; 3440 PetscViewer viewer=pcbddc->dbg_viewer; 3441 3442 PetscFunctionBegin; 3443 /* Reset previously computed graph */ 3444 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3445 /* Init local Graph struct */ 3446 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3447 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3448 3449 /* Check validity of the csr graph passed in by the user */ 3450 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3451 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3452 } 3453 3454 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3455 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3456 PetscInt *xadj,*adjncy; 3457 PetscInt nvtxs; 3458 PetscBool flg_row=PETSC_FALSE; 3459 3460 if (pcbddc->use_local_adj) { 3461 3462 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3463 if (flg_row) { 3464 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3465 pcbddc->computed_rowadj = PETSC_TRUE; 3466 } 3467 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3468 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3469 IS is_dummy; 3470 ISLocalToGlobalMapping l2gmap_dummy; 3471 PetscInt j,sum; 3472 PetscInt *cxadj,*cadjncy; 3473 const PetscInt *idxs; 3474 PCBDDCGraph graph; 3475 PetscBT is_on_boundary; 3476 3477 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3478 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3479 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3480 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3481 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3482 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3483 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3484 if (flg_row) { 3485 graph->xadj = xadj; 3486 graph->adjncy = adjncy; 3487 } 3488 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3489 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3490 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3491 3492 if (pcbddc->dbg_flag) { 3493 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3494 for (i=0;i<graph->ncc;i++) { 3495 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3496 } 3497 } 3498 3499 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3500 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3501 for (i=0;i<pcis->n_B;i++) { 3502 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3503 } 3504 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3505 3506 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3507 sum = 0; 3508 for (i=0;i<graph->ncc;i++) { 3509 PetscInt sizecc = 0; 3510 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3511 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3512 sizecc++; 3513 } 3514 } 3515 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3516 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3517 cxadj[graph->queue[j]] = sizecc; 3518 } 3519 } 3520 sum += sizecc*sizecc; 3521 } 3522 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3523 sum = 0; 3524 for (i=0;i<pcis->n;i++) { 3525 PetscInt temp = cxadj[i]; 3526 cxadj[i] = sum; 3527 sum += temp; 3528 } 3529 cxadj[pcis->n] = sum; 3530 for (i=0;i<graph->ncc;i++) { 3531 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3532 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3533 PetscInt k,sizecc = 0; 3534 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3535 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3536 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3537 sizecc++; 3538 } 3539 } 3540 } 3541 } 3542 } 3543 if (sum) { 3544 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3545 } else { 3546 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3547 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3548 } 3549 graph->xadj = 0; 3550 graph->adjncy = 0; 3551 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3552 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3553 } 3554 } 3555 if (pcbddc->dbg_flag) { 3556 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3557 } 3558 3559 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3560 vertex_size = 1; 3561 if (pcbddc->user_provided_isfordofs) { 3562 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3563 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3564 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3565 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3566 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3567 } 3568 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3569 pcbddc->n_ISForDofs = 0; 3570 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3571 } 3572 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3573 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3574 } else { 3575 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3576 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3577 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3578 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3579 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3580 } 3581 } 3582 } 3583 3584 /* Setup of Graph */ 3585 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3586 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3587 } 3588 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3589 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3590 } 3591 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3592 3593 /* Graph's connected components analysis */ 3594 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3595 3596 /* print some info to stdout */ 3597 if (pcbddc->dbg_flag) { 3598 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3599 } 3600 3601 /* mark topography has done */ 3602 pcbddc->recompute_topography = PETSC_FALSE; 3603 PetscFunctionReturn(0); 3604 } 3605 3606 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3607 #undef __FUNCT__ 3608 #define __FUNCT__ "PCBDDCSubsetNumbering" 3609 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3610 { 3611 PetscSF sf; 3612 PetscLayout map; 3613 const PetscInt *idxs; 3614 PetscInt *leaf_data,*root_data,*gidxs; 3615 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3616 PetscInt n_n,nlocals,start,first_index; 3617 PetscMPIInt commsize; 3618 PetscBool first_found; 3619 PetscErrorCode ierr; 3620 3621 PetscFunctionBegin; 3622 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3623 if (subset_mult) { 3624 PetscCheckSameComm(subset,1,subset_mult,2); 3625 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3626 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3627 } 3628 /* create workspace layout for computing global indices of subset */ 3629 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3630 lbounds[0] = lbounds[1] = 0; 3631 for (i=0;i<n;i++) { 3632 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3633 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3634 } 3635 lbounds[0] = -lbounds[0]; 3636 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3637 gbounds[0] = -gbounds[0]; 3638 N = gbounds[1] - gbounds[0] + 1; 3639 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3640 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3641 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3642 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3643 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3644 3645 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3646 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3647 if (subset_mult) { 3648 const PetscInt* idxs_mult; 3649 3650 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3651 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3652 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3653 } else { 3654 for (i=0;i<n;i++) leaf_data[i] = 1; 3655 } 3656 /* local size of new subset */ 3657 n_n = 0; 3658 for (i=0;i<n;i++) n_n += leaf_data[i]; 3659 3660 /* global indexes in layout */ 3661 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3662 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3663 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3664 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3665 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3666 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3667 3668 /* reduce from leaves to roots */ 3669 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3670 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3671 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3672 3673 /* count indexes in local part of layout */ 3674 nlocals = 0; 3675 first_index = -1; 3676 first_found = PETSC_FALSE; 3677 for (i=0;i<Nl;i++) { 3678 if (!first_found && root_data[i]) { 3679 first_found = PETSC_TRUE; 3680 first_index = i; 3681 } 3682 nlocals += root_data[i]; 3683 } 3684 3685 /* cumulative of number of indexes and size of subset without holes */ 3686 #if defined(PETSC_HAVE_MPI_EXSCAN) 3687 start = 0; 3688 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3689 #else 3690 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3691 start = start-nlocals; 3692 #endif 3693 3694 if (N_n) { /* compute total size of new subset if requested */ 3695 *N_n = start + nlocals; 3696 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3697 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3698 } 3699 3700 /* adapt root data with cumulative */ 3701 if (first_found) { 3702 PetscInt old_index; 3703 3704 root_data[first_index] += start; 3705 old_index = first_index; 3706 for (i=first_index+1;i<Nl;i++) { 3707 if (root_data[i]) { 3708 root_data[i] += root_data[old_index]; 3709 old_index = i; 3710 } 3711 } 3712 } 3713 3714 /* from roots to leaves */ 3715 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3716 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3717 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3718 3719 /* create new IS with global indexes without holes */ 3720 if (subset_mult) { 3721 const PetscInt* idxs_mult; 3722 PetscInt cum; 3723 3724 cum = 0; 3725 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3726 for (i=0;i<n;i++) { 3727 PetscInt j; 3728 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3729 } 3730 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3731 } else { 3732 for (i=0;i<n;i++) { 3733 gidxs[i] = leaf_data[i]-1; 3734 } 3735 } 3736 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3737 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3738 PetscFunctionReturn(0); 3739 } 3740 3741 #undef __FUNCT__ 3742 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3743 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3744 { 3745 PetscInt i,j; 3746 PetscScalar *alphas; 3747 PetscErrorCode ierr; 3748 3749 PetscFunctionBegin; 3750 /* this implements stabilized Gram-Schmidt */ 3751 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3752 for (i=0;i<n;i++) { 3753 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3754 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3755 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3756 } 3757 ierr = PetscFree(alphas);CHKERRQ(ierr); 3758 PetscFunctionReturn(0); 3759 } 3760 3761 #undef __FUNCT__ 3762 #define __FUNCT__ "MatISGetSubassemblingPattern" 3763 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3764 { 3765 IS ranks_send_to; 3766 PetscInt n_neighs,*neighs,*n_shared,**shared; 3767 PetscMPIInt size,rank,color; 3768 PetscInt *xadj,*adjncy; 3769 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3770 PetscInt i,local_size,threshold=0; 3771 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3772 PetscSubcomm subcomm; 3773 PetscErrorCode ierr; 3774 3775 PetscFunctionBegin; 3776 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3777 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3778 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3779 3780 /* Get info on mapping */ 3781 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3782 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3783 3784 /* build local CSR graph of subdomains' connectivity */ 3785 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3786 xadj[0] = 0; 3787 xadj[1] = PetscMax(n_neighs-1,0); 3788 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3789 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3790 3791 if (threshold) { 3792 PetscInt xadj_count = 0; 3793 for (i=1;i<n_neighs;i++) { 3794 if (n_shared[i] > threshold) { 3795 adjncy[xadj_count] = neighs[i]; 3796 adjncy_wgt[xadj_count] = n_shared[i]; 3797 xadj_count++; 3798 } 3799 } 3800 xadj[1] = xadj_count; 3801 } else { 3802 if (xadj[1]) { 3803 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3804 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3805 } 3806 } 3807 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3808 if (use_square) { 3809 for (i=0;i<xadj[1];i++) { 3810 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3811 } 3812 } 3813 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3814 3815 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3816 3817 /* 3818 Restrict work on active processes only. 3819 */ 3820 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3821 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3822 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3823 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3824 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3825 if (color) { 3826 ierr = PetscFree(xadj);CHKERRQ(ierr); 3827 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3828 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3829 } else { 3830 Mat subdomain_adj; 3831 IS new_ranks,new_ranks_contig; 3832 MatPartitioning partitioner; 3833 PetscInt prank,rstart=0,rend=0; 3834 PetscInt *is_indices,*oldranks; 3835 PetscBool aggregate; 3836 3837 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3838 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3839 prank = rank; 3840 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3841 /* 3842 for (i=0;i<size;i++) { 3843 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3844 } 3845 */ 3846 for (i=0;i<xadj[1];i++) { 3847 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3848 } 3849 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3850 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3851 if (aggregate) { 3852 PetscInt lrows,row,ncols,*cols; 3853 PetscMPIInt nrank; 3854 PetscScalar *vals; 3855 3856 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3857 lrows = 0; 3858 if (nrank<redprocs) { 3859 lrows = size/redprocs; 3860 if (nrank<size%redprocs) lrows++; 3861 } 3862 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3863 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3864 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3865 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3866 row = nrank; 3867 ncols = xadj[1]-xadj[0]; 3868 cols = adjncy; 3869 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3870 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3871 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3872 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 ierr = PetscFree(xadj);CHKERRQ(ierr); 3875 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3876 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3877 ierr = PetscFree(vals);CHKERRQ(ierr); 3878 } else { 3879 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3880 } 3881 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3882 3883 /* Partition */ 3884 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3885 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3886 if (use_vwgt) { 3887 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3888 v_wgt[0] = local_size; 3889 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3890 } 3891 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3892 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3893 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3894 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3895 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3896 3897 /* renumber new_ranks to avoid "holes" in new set of processors */ 3898 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3899 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3900 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3901 if (!redprocs) { 3902 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3903 } else { 3904 PetscInt idxs[1]; 3905 PetscMPIInt tag; 3906 MPI_Request *reqs; 3907 3908 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3909 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3910 for (i=rstart;i<rend;i++) { 3911 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3912 } 3913 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3914 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3915 ierr = PetscFree(reqs);CHKERRQ(ierr); 3916 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3917 } 3918 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3919 /* clean up */ 3920 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3921 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3922 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3923 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3924 } 3925 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3926 3927 /* assemble parallel IS for sends */ 3928 i = 1; 3929 if (color) i=0; 3930 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3931 /* get back IS */ 3932 *is_sends = ranks_send_to; 3933 PetscFunctionReturn(0); 3934 } 3935 3936 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3937 3938 #undef __FUNCT__ 3939 #define __FUNCT__ "MatISSubassemble" 3940 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3941 { 3942 Mat local_mat; 3943 IS is_sends_internal; 3944 PetscInt rows,cols,new_local_rows; 3945 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3946 PetscBool ismatis,isdense,newisdense,destroy_mat; 3947 ISLocalToGlobalMapping l2gmap; 3948 PetscInt* l2gmap_indices; 3949 const PetscInt* is_indices; 3950 MatType new_local_type; 3951 /* buffers */ 3952 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3953 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3954 PetscInt *recv_buffer_idxs_local; 3955 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3956 /* MPI */ 3957 MPI_Comm comm,comm_n; 3958 PetscSubcomm subcomm; 3959 PetscMPIInt n_sends,n_recvs,commsize; 3960 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3961 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3962 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3963 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3964 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3965 PetscErrorCode ierr; 3966 3967 PetscFunctionBegin; 3968 /* TODO: add missing checks */ 3969 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3970 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3971 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3972 PetscValidLogicalCollectiveInt(mat,nis,7); 3973 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3974 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3975 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3976 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3977 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3978 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3979 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3980 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3981 PetscInt mrows,mcols,mnrows,mncols; 3982 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3983 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3984 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3985 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3986 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3987 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3988 } 3989 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3990 PetscValidLogicalCollectiveInt(mat,bs,0); 3991 /* prepare IS for sending if not provided */ 3992 if (!is_sends) { 3993 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3994 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3995 } else { 3996 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3997 is_sends_internal = is_sends; 3998 } 3999 4000 /* get comm */ 4001 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4002 4003 /* compute number of sends */ 4004 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4005 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4006 4007 /* compute number of receives */ 4008 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4009 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4010 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4011 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4012 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4013 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4014 ierr = PetscFree(iflags);CHKERRQ(ierr); 4015 4016 /* restrict comm if requested */ 4017 subcomm = 0; 4018 destroy_mat = PETSC_FALSE; 4019 if (restrict_comm) { 4020 PetscMPIInt color,subcommsize; 4021 4022 color = 0; 4023 if (restrict_full) { 4024 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4025 } else { 4026 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4027 } 4028 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4029 subcommsize = commsize - subcommsize; 4030 /* check if reuse has been requested */ 4031 if (reuse == MAT_REUSE_MATRIX) { 4032 if (*mat_n) { 4033 PetscMPIInt subcommsize2; 4034 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4035 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4036 comm_n = PetscObjectComm((PetscObject)*mat_n); 4037 } else { 4038 comm_n = PETSC_COMM_SELF; 4039 } 4040 } else { /* MAT_INITIAL_MATRIX */ 4041 PetscMPIInt rank; 4042 4043 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4044 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4045 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4046 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4047 comm_n = PetscSubcommChild(subcomm); 4048 } 4049 /* flag to destroy *mat_n if not significative */ 4050 if (color) destroy_mat = PETSC_TRUE; 4051 } else { 4052 comm_n = comm; 4053 } 4054 4055 /* prepare send/receive buffers */ 4056 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4057 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4058 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4059 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4060 if (nis) { 4061 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4062 } 4063 4064 /* Get data from local matrices */ 4065 if (!isdense) { 4066 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4067 /* TODO: See below some guidelines on how to prepare the local buffers */ 4068 /* 4069 send_buffer_vals should contain the raw values of the local matrix 4070 send_buffer_idxs should contain: 4071 - MatType_PRIVATE type 4072 - PetscInt size_of_l2gmap 4073 - PetscInt global_row_indices[size_of_l2gmap] 4074 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4075 */ 4076 } else { 4077 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4078 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4079 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4080 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4081 send_buffer_idxs[1] = i; 4082 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4083 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4084 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4085 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4086 for (i=0;i<n_sends;i++) { 4087 ilengths_vals[is_indices[i]] = len*len; 4088 ilengths_idxs[is_indices[i]] = len+2; 4089 } 4090 } 4091 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4092 /* additional is (if any) */ 4093 if (nis) { 4094 PetscMPIInt psum; 4095 PetscInt j; 4096 for (j=0,psum=0;j<nis;j++) { 4097 PetscInt plen; 4098 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4099 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4100 psum += len+1; /* indices + lenght */ 4101 } 4102 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4103 for (j=0,psum=0;j<nis;j++) { 4104 PetscInt plen; 4105 const PetscInt *is_array_idxs; 4106 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4107 send_buffer_idxs_is[psum] = plen; 4108 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4109 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4110 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4111 psum += plen+1; /* indices + lenght */ 4112 } 4113 for (i=0;i<n_sends;i++) { 4114 ilengths_idxs_is[is_indices[i]] = psum; 4115 } 4116 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4117 } 4118 4119 buf_size_idxs = 0; 4120 buf_size_vals = 0; 4121 buf_size_idxs_is = 0; 4122 for (i=0;i<n_recvs;i++) { 4123 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4124 buf_size_vals += (PetscInt)olengths_vals[i]; 4125 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4126 } 4127 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4128 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4129 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4130 4131 /* get new tags for clean communications */ 4132 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4133 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4134 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4135 4136 /* allocate for requests */ 4137 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4138 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4139 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4140 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4141 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4142 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4143 4144 /* communications */ 4145 ptr_idxs = recv_buffer_idxs; 4146 ptr_vals = recv_buffer_vals; 4147 ptr_idxs_is = recv_buffer_idxs_is; 4148 for (i=0;i<n_recvs;i++) { 4149 source_dest = onodes[i]; 4150 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4151 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4152 ptr_idxs += olengths_idxs[i]; 4153 ptr_vals += olengths_vals[i]; 4154 if (nis) { 4155 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4156 ptr_idxs_is += olengths_idxs_is[i]; 4157 } 4158 } 4159 for (i=0;i<n_sends;i++) { 4160 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4161 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4162 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4163 if (nis) { 4164 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4165 } 4166 } 4167 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4168 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4169 4170 /* assemble new l2g map */ 4171 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4172 ptr_idxs = recv_buffer_idxs; 4173 new_local_rows = 0; 4174 for (i=0;i<n_recvs;i++) { 4175 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4176 ptr_idxs += olengths_idxs[i]; 4177 } 4178 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4179 ptr_idxs = recv_buffer_idxs; 4180 new_local_rows = 0; 4181 for (i=0;i<n_recvs;i++) { 4182 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4183 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4184 ptr_idxs += olengths_idxs[i]; 4185 } 4186 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4187 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4188 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4189 4190 /* infer new local matrix type from received local matrices type */ 4191 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4192 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4193 if (n_recvs) { 4194 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4195 ptr_idxs = recv_buffer_idxs; 4196 for (i=0;i<n_recvs;i++) { 4197 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4198 new_local_type_private = MATAIJ_PRIVATE; 4199 break; 4200 } 4201 ptr_idxs += olengths_idxs[i]; 4202 } 4203 switch (new_local_type_private) { 4204 case MATDENSE_PRIVATE: 4205 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4206 new_local_type = MATSEQAIJ; 4207 bs = 1; 4208 } else { /* if I receive only 1 dense matrix */ 4209 new_local_type = MATSEQDENSE; 4210 bs = 1; 4211 } 4212 break; 4213 case MATAIJ_PRIVATE: 4214 new_local_type = MATSEQAIJ; 4215 bs = 1; 4216 break; 4217 case MATBAIJ_PRIVATE: 4218 new_local_type = MATSEQBAIJ; 4219 break; 4220 case MATSBAIJ_PRIVATE: 4221 new_local_type = MATSEQSBAIJ; 4222 break; 4223 default: 4224 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4225 break; 4226 } 4227 } else { /* by default, new_local_type is seqdense */ 4228 new_local_type = MATSEQDENSE; 4229 bs = 1; 4230 } 4231 4232 /* create MATIS object if needed */ 4233 if (reuse == MAT_INITIAL_MATRIX) { 4234 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4235 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4236 } else { 4237 /* it also destroys the local matrices */ 4238 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4239 } 4240 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4241 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4242 4243 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4244 4245 /* Global to local map of received indices */ 4246 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4247 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4248 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4249 4250 /* restore attributes -> type of incoming data and its size */ 4251 buf_size_idxs = 0; 4252 for (i=0;i<n_recvs;i++) { 4253 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4254 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4255 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4256 } 4257 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4258 4259 /* set preallocation */ 4260 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4261 if (!newisdense) { 4262 PetscInt *new_local_nnz=0; 4263 4264 ptr_vals = recv_buffer_vals; 4265 ptr_idxs = recv_buffer_idxs_local; 4266 if (n_recvs) { 4267 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4268 } 4269 for (i=0;i<n_recvs;i++) { 4270 PetscInt j; 4271 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4272 for (j=0;j<*(ptr_idxs+1);j++) { 4273 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4274 } 4275 } else { 4276 /* TODO */ 4277 } 4278 ptr_idxs += olengths_idxs[i]; 4279 } 4280 if (new_local_nnz) { 4281 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4282 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4283 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4284 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4285 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4286 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4287 } else { 4288 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4289 } 4290 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4291 } else { 4292 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4293 } 4294 4295 /* set values */ 4296 ptr_vals = recv_buffer_vals; 4297 ptr_idxs = recv_buffer_idxs_local; 4298 for (i=0;i<n_recvs;i++) { 4299 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4300 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4301 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4302 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4303 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4304 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4305 } else { 4306 /* TODO */ 4307 } 4308 ptr_idxs += olengths_idxs[i]; 4309 ptr_vals += olengths_vals[i]; 4310 } 4311 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4312 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4313 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4314 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4315 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4316 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4317 4318 #if 0 4319 if (!restrict_comm) { /* check */ 4320 Vec lvec,rvec; 4321 PetscReal infty_error; 4322 4323 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4324 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4325 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4326 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4327 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4328 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4329 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4330 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4331 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4332 } 4333 #endif 4334 4335 /* assemble new additional is (if any) */ 4336 if (nis) { 4337 PetscInt **temp_idxs,*count_is,j,psum; 4338 4339 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4340 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4341 ptr_idxs = recv_buffer_idxs_is; 4342 psum = 0; 4343 for (i=0;i<n_recvs;i++) { 4344 for (j=0;j<nis;j++) { 4345 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4346 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4347 psum += plen; 4348 ptr_idxs += plen+1; /* shift pointer to received data */ 4349 } 4350 } 4351 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4352 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4353 for (i=1;i<nis;i++) { 4354 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4355 } 4356 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4357 ptr_idxs = recv_buffer_idxs_is; 4358 for (i=0;i<n_recvs;i++) { 4359 for (j=0;j<nis;j++) { 4360 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4361 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4362 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4363 ptr_idxs += plen+1; /* shift pointer to received data */ 4364 } 4365 } 4366 for (i=0;i<nis;i++) { 4367 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4368 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4369 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4370 } 4371 ierr = PetscFree(count_is);CHKERRQ(ierr); 4372 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4373 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4374 } 4375 /* free workspace */ 4376 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4377 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4378 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4379 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4380 if (isdense) { 4381 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4382 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4383 } else { 4384 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4385 } 4386 if (nis) { 4387 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4388 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4389 } 4390 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4391 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4392 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4393 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4394 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4395 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4396 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4397 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4398 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4399 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4400 ierr = PetscFree(onodes);CHKERRQ(ierr); 4401 if (nis) { 4402 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4403 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4404 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4405 } 4406 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4407 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4408 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4409 for (i=0;i<nis;i++) { 4410 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4411 } 4412 *mat_n = NULL; 4413 } 4414 PetscFunctionReturn(0); 4415 } 4416 4417 /* temporary hack into ksp private data structure */ 4418 #include <petsc/private/kspimpl.h> 4419 4420 #undef __FUNCT__ 4421 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4422 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4423 { 4424 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4425 PC_IS *pcis = (PC_IS*)pc->data; 4426 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4427 MatNullSpace CoarseNullSpace=NULL; 4428 ISLocalToGlobalMapping coarse_islg; 4429 IS coarse_is,*isarray; 4430 PetscInt i,im_active=-1,active_procs=-1; 4431 PetscInt nis,nisdofs,nisneu; 4432 PC pc_temp; 4433 PCType coarse_pc_type; 4434 KSPType coarse_ksp_type; 4435 PetscBool multilevel_requested,multilevel_allowed; 4436 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4437 Mat t_coarse_mat_is; 4438 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4439 PetscMPIInt all_procs; 4440 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4441 PetscBool compute_vecs = PETSC_FALSE; 4442 PetscScalar *array; 4443 PetscErrorCode ierr; 4444 4445 PetscFunctionBegin; 4446 /* Assign global numbering to coarse dofs */ 4447 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4448 PetscInt ocoarse_size; 4449 compute_vecs = PETSC_TRUE; 4450 ocoarse_size = pcbddc->coarse_size; 4451 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4452 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4453 /* see if we can avoid some work */ 4454 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4455 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4456 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4457 PC pc; 4458 PetscBool isbddc; 4459 4460 /* temporary workaround since PCBDDC does not have a reset method so far */ 4461 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4462 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4463 if (isbddc) { 4464 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4465 } 4466 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4467 coarse_reuse = PETSC_FALSE; 4468 } else { /* we can safely reuse already computed coarse matrix */ 4469 coarse_reuse = PETSC_TRUE; 4470 } 4471 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4472 coarse_reuse = PETSC_FALSE; 4473 } 4474 /* reset any subassembling information */ 4475 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4476 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4477 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4478 coarse_reuse = PETSC_TRUE; 4479 } 4480 4481 /* count "active" (i.e. with positive local size) and "void" processes */ 4482 im_active = !!(pcis->n); 4483 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4484 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4485 void_procs = all_procs-active_procs; 4486 csin_type_simple = PETSC_TRUE; 4487 redist = PETSC_FALSE; 4488 if (pcbddc->current_level && void_procs) { 4489 csin_ml = PETSC_TRUE; 4490 ncoarse_ml = void_procs; 4491 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4492 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4493 csin_ds = PETSC_TRUE; 4494 ncoarse_ds = pcbddc->redistribute_coarse; 4495 redist = PETSC_TRUE; 4496 } else { 4497 csin_ds = PETSC_TRUE; 4498 ncoarse_ds = active_procs; 4499 redist = PETSC_TRUE; 4500 } 4501 } else { 4502 csin_ml = PETSC_FALSE; 4503 ncoarse_ml = all_procs; 4504 if (void_procs) { 4505 csin_ds = PETSC_TRUE; 4506 ncoarse_ds = void_procs; 4507 csin_type_simple = PETSC_FALSE; 4508 } else { 4509 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4510 csin_ds = PETSC_TRUE; 4511 ncoarse_ds = pcbddc->redistribute_coarse; 4512 redist = PETSC_TRUE; 4513 } else { 4514 csin_ds = PETSC_FALSE; 4515 ncoarse_ds = all_procs; 4516 } 4517 } 4518 } 4519 4520 /* 4521 test if we can go multilevel: three conditions must be satisfied: 4522 - we have not exceeded the number of levels requested 4523 - we can actually subassemble the active processes 4524 - we can find a suitable number of MPI processes where we can place the subassembled problem 4525 */ 4526 multilevel_allowed = PETSC_FALSE; 4527 multilevel_requested = PETSC_FALSE; 4528 if (pcbddc->current_level < pcbddc->max_levels) { 4529 multilevel_requested = PETSC_TRUE; 4530 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4531 multilevel_allowed = PETSC_FALSE; 4532 } else { 4533 multilevel_allowed = PETSC_TRUE; 4534 } 4535 } 4536 /* determine number of process partecipating to coarse solver */ 4537 if (multilevel_allowed) { 4538 ncoarse = ncoarse_ml; 4539 csin = csin_ml; 4540 redist = PETSC_FALSE; 4541 } else { 4542 ncoarse = ncoarse_ds; 4543 csin = csin_ds; 4544 } 4545 4546 /* creates temporary l2gmap and IS for coarse indexes */ 4547 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4548 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4549 4550 /* creates temporary MATIS object for coarse matrix */ 4551 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4552 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4553 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4554 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4555 #if 0 4556 { 4557 PetscViewer viewer; 4558 char filename[256]; 4559 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4560 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4561 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4562 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4563 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4564 } 4565 #endif 4566 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4567 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4568 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4569 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4570 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4571 4572 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4573 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4574 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4575 const PetscInt *idxs; 4576 ISLocalToGlobalMapping tmap; 4577 4578 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4579 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4580 /* allocate space for temporary storage */ 4581 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4582 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4583 /* allocate for IS array */ 4584 nisdofs = pcbddc->n_ISForDofsLocal; 4585 nisneu = !!pcbddc->NeumannBoundariesLocal; 4586 nis = nisdofs + nisneu; 4587 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4588 /* dofs splitting */ 4589 for (i=0;i<nisdofs;i++) { 4590 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4591 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4592 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4593 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4594 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4595 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4596 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4597 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4598 } 4599 /* neumann boundaries */ 4600 if (pcbddc->NeumannBoundariesLocal) { 4601 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4602 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4603 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4604 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4605 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4606 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4607 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4608 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4609 } 4610 /* free memory */ 4611 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4612 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4613 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4614 } else { 4615 nis = 0; 4616 nisdofs = 0; 4617 nisneu = 0; 4618 isarray = NULL; 4619 } 4620 /* destroy no longer needed map */ 4621 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4622 4623 /* restrict on coarse candidates (if needed) */ 4624 coarse_mat_is = NULL; 4625 if (csin) { 4626 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4627 if (redist) { 4628 PetscMPIInt rank; 4629 PetscInt spc,n_spc_p1,dest[1],destsize; 4630 4631 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4632 spc = active_procs/ncoarse; 4633 n_spc_p1 = active_procs%ncoarse; 4634 if (im_active) { 4635 destsize = 1; 4636 if (rank > n_spc_p1*(spc+1)-1) { 4637 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4638 } else { 4639 dest[0] = rank/(spc+1); 4640 } 4641 } else { 4642 destsize = 0; 4643 } 4644 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4645 } else if (csin_type_simple) { 4646 PetscMPIInt rank; 4647 PetscInt issize,isidx; 4648 4649 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4650 if (im_active) { 4651 issize = 1; 4652 isidx = (PetscInt)rank; 4653 } else { 4654 issize = 0; 4655 isidx = -1; 4656 } 4657 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4658 } else { /* get a suitable subassembling pattern from MATIS code */ 4659 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4660 } 4661 4662 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4663 if (!redist || ncoarse <= void_procs) { 4664 PetscInt ncoarse_cand,tissize,*nisindices; 4665 PetscInt *coarse_candidates; 4666 const PetscInt* tisindices; 4667 4668 /* get coarse candidates' ranks in pc communicator */ 4669 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4670 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4671 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4672 if (!coarse_candidates[i]) { 4673 coarse_candidates[ncoarse_cand++]=i; 4674 } 4675 } 4676 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4677 4678 4679 if (pcbddc->dbg_flag) { 4680 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4681 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4682 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4683 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4684 for (i=0;i<ncoarse_cand;i++) { 4685 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4686 } 4687 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4688 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4689 } 4690 /* shift the pattern on coarse candidates */ 4691 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4692 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4693 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4694 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4695 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4696 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4697 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4698 } 4699 if (pcbddc->dbg_flag) { 4700 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4701 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4702 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4704 } 4705 } 4706 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4707 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4708 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4709 } else { /* this is the last level, so use just receiving processes in subcomm */ 4710 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4711 } 4712 } else { 4713 if (pcbddc->dbg_flag) { 4714 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4715 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4716 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4717 } 4718 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4719 coarse_mat_is = t_coarse_mat_is; 4720 } 4721 4722 /* create local to global scatters for coarse problem */ 4723 if (compute_vecs) { 4724 PetscInt lrows; 4725 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4726 if (coarse_mat_is) { 4727 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4728 } else { 4729 lrows = 0; 4730 } 4731 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4732 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4733 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4734 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4735 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4736 } 4737 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4738 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4739 4740 /* set defaults for coarse KSP and PC */ 4741 if (multilevel_allowed) { 4742 coarse_ksp_type = KSPRICHARDSON; 4743 coarse_pc_type = PCBDDC; 4744 } else { 4745 coarse_ksp_type = KSPPREONLY; 4746 coarse_pc_type = PCREDUNDANT; 4747 } 4748 4749 /* print some info if requested */ 4750 if (pcbddc->dbg_flag) { 4751 if (!multilevel_allowed) { 4752 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4753 if (multilevel_requested) { 4754 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4755 } else if (pcbddc->max_levels) { 4756 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4757 } 4758 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4759 } 4760 } 4761 4762 /* create the coarse KSP object only once with defaults */ 4763 if (coarse_mat_is) { 4764 MatReuse coarse_mat_reuse; 4765 PetscViewer dbg_viewer = NULL; 4766 if (pcbddc->dbg_flag) { 4767 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4768 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4769 } 4770 if (!pcbddc->coarse_ksp) { 4771 char prefix[256],str_level[16]; 4772 size_t len; 4773 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4774 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4775 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4776 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4777 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4778 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4779 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4780 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4781 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4782 /* prefix */ 4783 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4784 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4785 if (!pcbddc->current_level) { 4786 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4787 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4788 } else { 4789 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4790 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4791 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4792 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4793 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4794 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4795 } 4796 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4797 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4798 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4799 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4800 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4801 /* allow user customization */ 4802 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4803 } 4804 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4805 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4806 if (nisdofs) { 4807 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4808 for (i=0;i<nisdofs;i++) { 4809 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4810 } 4811 } 4812 if (nisneu) { 4813 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4814 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4815 } 4816 4817 /* get some info after set from options */ 4818 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4819 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4820 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4821 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4822 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4823 isbddc = PETSC_FALSE; 4824 } 4825 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4826 if (isredundant) { 4827 KSP inner_ksp; 4828 PC inner_pc; 4829 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4830 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4831 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4832 } 4833 4834 /* assemble coarse matrix */ 4835 if (coarse_reuse) { 4836 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4837 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4838 coarse_mat_reuse = MAT_REUSE_MATRIX; 4839 } else { 4840 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4841 } 4842 if (isbddc || isnn) { 4843 if (pcbddc->coarsening_ratio > 1) { 4844 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4845 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4846 if (pcbddc->dbg_flag) { 4847 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4848 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4849 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4850 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4851 } 4852 } 4853 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4854 } else { 4855 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4856 coarse_mat = coarse_mat_is; 4857 } 4858 } else { 4859 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4860 } 4861 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4862 4863 /* propagate symmetry info of coarse matrix */ 4864 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4865 if (pc->pmat->symmetric_set) { 4866 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4867 } 4868 if (pc->pmat->hermitian_set) { 4869 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4870 } 4871 if (pc->pmat->spd_set) { 4872 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4873 } 4874 /* set operators */ 4875 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4876 if (pcbddc->dbg_flag) { 4877 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4878 } 4879 } else { /* processes non partecipating to coarse solver (if any) */ 4880 coarse_mat = 0; 4881 } 4882 ierr = PetscFree(isarray);CHKERRQ(ierr); 4883 #if 0 4884 { 4885 PetscViewer viewer; 4886 char filename[256]; 4887 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4888 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4889 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4890 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4891 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4892 } 4893 #endif 4894 4895 /* Compute coarse null space (special handling by BDDC only) */ 4896 #if 0 4897 if (pcbddc->NullSpace) { 4898 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4899 } 4900 #endif 4901 /* hack */ 4902 if (pcbddc->coarse_ksp) { 4903 Vec crhs,csol; 4904 4905 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4906 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4907 if (!csol) { 4908 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4909 } 4910 if (!crhs) { 4911 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4912 } 4913 } 4914 4915 /* compute null space for coarse solver if the benign trick has been requested */ 4916 if (pcbddc->benign_null) { 4917 4918 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 4919 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-1,1.0,INSERT_VALUES);CHKERRQ(ierr); 4920 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 4921 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 4922 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4923 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4924 if (coarse_mat) { 4925 Vec nullv; 4926 PetscScalar *array,*array2; 4927 PetscInt nl; 4928 4929 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 4930 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 4931 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 4932 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 4933 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 4934 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 4935 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 4936 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 4937 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 4938 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 4939 } 4940 } 4941 4942 if (pcbddc->coarse_ksp) { 4943 PetscBool ispreonly; 4944 4945 if (CoarseNullSpace) { 4946 PetscBool isnull; 4947 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 4948 if (isnull) { 4949 if (isbddc) { 4950 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4951 } else { 4952 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4953 } 4954 } else { 4955 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4956 } 4957 } 4958 /* setup coarse ksp */ 4959 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4960 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4961 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4962 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4963 KSP check_ksp; 4964 KSPType check_ksp_type; 4965 PC check_pc; 4966 Vec check_vec,coarse_vec; 4967 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4968 PetscInt its; 4969 PetscBool compute_eigs; 4970 PetscReal *eigs_r,*eigs_c; 4971 PetscInt neigs; 4972 const char *prefix; 4973 4974 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4975 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4976 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4977 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4978 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4979 if (ispreonly) { 4980 check_ksp_type = KSPPREONLY; 4981 compute_eigs = PETSC_FALSE; 4982 } else { 4983 check_ksp_type = KSPGMRES; 4984 compute_eigs = PETSC_TRUE; 4985 } 4986 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4987 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4988 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4989 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4990 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4991 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4992 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4993 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4994 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4995 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4996 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4997 /* create random vec */ 4998 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4999 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5000 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5001 if (CoarseNullSpace) { 5002 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5003 } 5004 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5005 /* solve coarse problem */ 5006 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5007 if (CoarseNullSpace) { 5008 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5009 } 5010 /* set eigenvalue estimation if preonly has not been requested */ 5011 if (compute_eigs) { 5012 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5013 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5014 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5015 lambda_max = eigs_r[neigs-1]; 5016 lambda_min = eigs_r[0]; 5017 if (pcbddc->use_coarse_estimates) { 5018 if (lambda_max>lambda_min) { 5019 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5020 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5021 } 5022 } 5023 } 5024 5025 /* check coarse problem residual error */ 5026 if (pcbddc->dbg_flag) { 5027 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5028 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5029 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5030 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5031 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5032 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5033 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5034 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5035 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5036 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5037 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5038 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5039 if (CoarseNullSpace) { 5040 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5041 } 5042 if (compute_eigs) { 5043 PetscReal lambda_max_s,lambda_min_s; 5044 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5045 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5046 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5047 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5048 for (i=0;i<neigs;i++) { 5049 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5050 } 5051 } 5052 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5053 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5054 } 5055 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5056 if (compute_eigs) { 5057 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5058 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5059 } 5060 } 5061 } 5062 /* print additional info */ 5063 if (pcbddc->dbg_flag) { 5064 /* waits until all processes reaches this point */ 5065 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5067 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5068 } 5069 5070 /* free memory */ 5071 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5072 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5073 PetscFunctionReturn(0); 5074 } 5075 5076 #undef __FUNCT__ 5077 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5078 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5079 { 5080 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5081 PC_IS* pcis = (PC_IS*)pc->data; 5082 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5083 IS subset,subset_mult,subset_n; 5084 PetscInt local_size,coarse_size=0; 5085 PetscInt *local_primal_indices=NULL; 5086 const PetscInt *t_local_primal_indices; 5087 PetscErrorCode ierr; 5088 5089 PetscFunctionBegin; 5090 /* Compute global number of coarse dofs */ 5091 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5092 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5093 } 5094 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5095 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5096 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5097 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5098 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5099 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5100 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5101 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5102 if (local_size != pcbddc->local_primal_size) { 5103 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5104 } 5105 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5106 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5107 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5108 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5109 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5110 5111 /* check numbering */ 5112 if (pcbddc->dbg_flag) { 5113 PetscScalar coarsesum,*array,*array2; 5114 PetscInt i; 5115 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5116 5117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5120 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5121 /* counter */ 5122 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5123 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5124 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5125 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5126 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5127 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5128 5129 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5130 for (i=0;i<pcbddc->local_primal_size;i++) { 5131 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5132 } 5133 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5134 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5135 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5136 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5137 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5138 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5139 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5140 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5141 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5142 for (i=0;i<pcis->n;i++) { 5143 if (array[i] != 0.0 && array[i] != array2[i]) { 5144 PetscInt owned = (PetscInt)(array[i]); 5145 PetscInt neigh = (PetscInt)(array2[i]); 5146 set_error = PETSC_TRUE; 5147 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5148 } 5149 } 5150 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5151 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5153 for (i=0;i<pcis->n;i++) { 5154 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5155 } 5156 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5157 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5158 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5159 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5160 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5162 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5163 PetscInt *gidxs; 5164 5165 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5166 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5167 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5168 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5169 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5170 for (i=0;i<pcbddc->local_primal_size;i++) { 5171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5172 } 5173 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5174 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5175 } 5176 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5177 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5178 } 5179 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5180 /* get back data */ 5181 *coarse_size_n = coarse_size; 5182 *local_primal_indices_n = local_primal_indices; 5183 PetscFunctionReturn(0); 5184 } 5185 5186 #undef __FUNCT__ 5187 #define __FUNCT__ "PCBDDCGlobalToLocal" 5188 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5189 { 5190 IS localis_t; 5191 PetscInt i,lsize,*idxs,n; 5192 PetscScalar *vals; 5193 PetscErrorCode ierr; 5194 5195 PetscFunctionBegin; 5196 /* get indices in local ordering exploiting local to global map */ 5197 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5198 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5199 for (i=0;i<lsize;i++) vals[i] = 1.0; 5200 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5201 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5202 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5203 if (idxs) { /* multilevel guard */ 5204 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5205 } 5206 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5207 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5208 ierr = PetscFree(vals);CHKERRQ(ierr); 5209 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5210 /* now compute set in local ordering */ 5211 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5212 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5213 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5214 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5215 for (i=0,lsize=0;i<n;i++) { 5216 if (PetscRealPart(vals[i]) > 0.5) { 5217 lsize++; 5218 } 5219 } 5220 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5221 for (i=0,lsize=0;i<n;i++) { 5222 if (PetscRealPart(vals[i]) > 0.5) { 5223 idxs[lsize++] = i; 5224 } 5225 } 5226 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5227 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5228 *localis = localis_t; 5229 PetscFunctionReturn(0); 5230 } 5231 5232 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5233 #undef __FUNCT__ 5234 #define __FUNCT__ "PCBDDCMatMult_Private" 5235 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5236 { 5237 PCBDDCChange_ctx change_ctx; 5238 PetscErrorCode ierr; 5239 5240 PetscFunctionBegin; 5241 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5242 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5243 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5244 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5245 PetscFunctionReturn(0); 5246 } 5247 5248 #undef __FUNCT__ 5249 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5250 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5251 { 5252 PCBDDCChange_ctx change_ctx; 5253 PetscErrorCode ierr; 5254 5255 PetscFunctionBegin; 5256 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5257 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5258 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5259 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5260 PetscFunctionReturn(0); 5261 } 5262 5263 #undef __FUNCT__ 5264 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5265 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5266 { 5267 PC_IS *pcis=(PC_IS*)pc->data; 5268 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5269 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5270 Mat S_j; 5271 PetscInt *used_xadj,*used_adjncy; 5272 PetscBool free_used_adj; 5273 PetscErrorCode ierr; 5274 5275 PetscFunctionBegin; 5276 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5277 free_used_adj = PETSC_FALSE; 5278 if (pcbddc->sub_schurs_layers == -1) { 5279 used_xadj = NULL; 5280 used_adjncy = NULL; 5281 } else { 5282 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5283 used_xadj = pcbddc->mat_graph->xadj; 5284 used_adjncy = pcbddc->mat_graph->adjncy; 5285 } else if (pcbddc->computed_rowadj) { 5286 used_xadj = pcbddc->mat_graph->xadj; 5287 used_adjncy = pcbddc->mat_graph->adjncy; 5288 } else { 5289 PetscBool flg_row=PETSC_FALSE; 5290 const PetscInt *xadj,*adjncy; 5291 PetscInt nvtxs; 5292 5293 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5294 if (flg_row) { 5295 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5296 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5297 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5298 free_used_adj = PETSC_TRUE; 5299 } else { 5300 pcbddc->sub_schurs_layers = -1; 5301 used_xadj = NULL; 5302 used_adjncy = NULL; 5303 } 5304 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5305 } 5306 } 5307 5308 /* setup sub_schurs data */ 5309 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5310 if (!sub_schurs->use_mumps) { 5311 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5312 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5313 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5314 } else { 5315 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5316 PetscBool isseqaij; 5317 if (!pcbddc->use_vertices && reuse_solvers) { 5318 PetscInt n_vertices; 5319 5320 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5321 reuse_solvers = (PetscBool)!n_vertices; 5322 } 5323 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5324 if (!isseqaij) { 5325 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5326 if (matis->A == pcbddc->local_mat) { 5327 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5328 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5329 } else { 5330 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5331 } 5332 } 5333 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5334 } 5335 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5336 5337 /* free adjacency */ 5338 if (free_used_adj) { 5339 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5340 } 5341 PetscFunctionReturn(0); 5342 } 5343 5344 #undef __FUNCT__ 5345 #define __FUNCT__ "PCBDDCInitSubSchurs" 5346 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5347 { 5348 PC_IS *pcis=(PC_IS*)pc->data; 5349 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5350 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5351 PCBDDCGraph graph; 5352 PetscErrorCode ierr; 5353 5354 PetscFunctionBegin; 5355 /* attach interface graph for determining subsets */ 5356 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5357 IS verticesIS,verticescomm; 5358 PetscInt vsize,*idxs; 5359 5360 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5361 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5362 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5363 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5364 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5365 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5366 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5367 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5368 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5369 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5370 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5371 /* 5372 if (pcbddc->dbg_flag) { 5373 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5374 } 5375 */ 5376 } else { 5377 graph = pcbddc->mat_graph; 5378 } 5379 5380 /* sub_schurs init */ 5381 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5382 5383 /* free graph struct */ 5384 if (pcbddc->sub_schurs_rebuild) { 5385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5386 } 5387 PetscFunctionReturn(0); 5388 } 5389 5390 #undef __FUNCT__ 5391 #define __FUNCT__ "PCBDDCCheckOperator" 5392 PetscErrorCode PCBDDCCheckOperator(PC pc) 5393 { 5394 PC_IS *pcis=(PC_IS*)pc->data; 5395 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5396 PetscErrorCode ierr; 5397 5398 PetscFunctionBegin; 5399 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5400 IS zerodiag = NULL; 5401 Mat S_j,B0=NULL,B0_B=NULL; 5402 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5403 PetscScalar norm,p0_check,*array,*array2; 5404 PetscInt i; 5405 5406 /* B0 and B0_B */ 5407 if (zerodiag) { 5408 IS dummy; 5409 PetscInt ii[2]; 5410 5411 ii[0] = 0; 5412 ii[1] = pcbddc->B0_ncol; 5413 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 5414 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 5415 ierr = MatGetSubMatrix(B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5416 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5417 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5418 } 5419 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5420 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5421 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5422 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5423 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5424 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5425 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5426 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5427 /* S_j */ 5428 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5429 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5430 5431 /* mimic vector in \widetilde{W}_\Gamma */ 5432 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5433 /* continuous in primal space */ 5434 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5435 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5436 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5437 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5438 if (zerodiag) { 5439 p0_check = array[pcbddc->local_primal_size-1]; 5440 } else { 5441 p0_check = 0; 5442 } 5443 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5444 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5445 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5446 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5447 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5448 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5449 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5450 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5451 5452 /* assemble rhs for coarse problem */ 5453 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5454 /* local with Schur */ 5455 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5456 if (zerodiag) { 5457 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5458 array[0] = p0_check; 5459 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5460 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5461 } 5462 /* sum on primal nodes the local contributions */ 5463 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5464 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5465 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5466 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5467 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5468 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5469 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5470 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5471 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5472 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5473 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5474 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5475 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5476 /* scale primal nodes (BDDC sums contibutions) */ 5477 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5478 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5479 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5480 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5481 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5482 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5483 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5484 /* global: \widetilde{B0}_B w_\Gamma */ 5485 if (zerodiag) { 5486 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5487 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5488 pcbddc->benign_p0 = array[0]; 5489 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5490 } else { 5491 pcbddc->benign_p0 = 0.; 5492 } 5493 /* BDDC */ 5494 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5495 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5496 5497 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5498 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5499 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5500 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5501 if (pcbddc->benign_p0_lidx >= 0) { 5502 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0 error is %1.4e\n",PetscGlobalRank,PetscAbsScalar(pcbddc->benign_p0-p0_check)); 5503 } 5504 5505 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5506 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5507 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5508 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5509 ierr = MatDestroy(&B0);CHKERRQ(ierr); 5510 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5511 } 5512 PetscFunctionReturn(0); 5513 } 5514