1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "PCBDDCBenignCheck" 10 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 11 { 12 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 13 PC_IS* pcis = (PC_IS*)(pc->data); 14 PetscErrorCode ierr; 15 16 PetscFunctionBegin; 17 if (zerodiag) { 18 Mat A; 19 Vec vec3_N; 20 IS dirIS = NULL; 21 PetscScalar *vals; 22 const PetscInt *idxs; 23 PetscInt i,nz; 24 25 /* p0 */ 26 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 27 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 28 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 29 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 30 for (i=0;i<nz;i++) vals[i] = 1.; /* TODO add quadrature */ 31 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 32 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 33 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 34 /* v_I */ 35 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 36 for (i=0;i<nz;i++) vals[i] = 0.; 37 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 38 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 39 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 40 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 41 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 42 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 43 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 44 if (dirIS) { 45 PetscInt n; 46 47 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 48 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 49 for (i=0;i<n;i++) vals[i] = 0.; 50 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 51 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 52 } 53 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 54 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 55 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 56 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 57 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 58 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 59 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 60 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 61 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { /* TODO: should I add the A-norm in test? */ 62 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 63 } 64 ierr = PetscFree(vals);CHKERRQ(ierr); 65 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 66 } 67 68 /* check PCBDDCBenignGetOrSetP0 */ 69 if (pcbddc->benign_saddle_point) { 70 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 71 pcbddc->benign_p0 = -PetscGlobalRank; 72 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 73 pcbddc->benign_p0 = 1; 74 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 75 if (pcbddc->benign_p0_gidx >=0 && pcbddc->benign_p0 != -PetscGlobalRank) { 76 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e instead of %1.4e\n",pcbddc->benign_p0,-PetscGlobalRank);CHKERRQ(ierr); 77 } 78 } 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 84 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 85 { 86 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 87 IS zerodiag; 88 PetscInt nz; 89 PetscBool sorted; 90 PetscErrorCode ierr; 91 92 PetscFunctionBegin; 93 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 94 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 95 pcbddc->benign_original_mat = pcbddc->local_mat; 96 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 97 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 98 if (!sorted) { 99 ierr = ISSort(zerodiag);CHKERRQ(ierr); 100 } 101 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 102 if (nz) { 103 IS zerodiagc; 104 PetscScalar *array; 105 const PetscInt *idxs,*idxsc; 106 PetscInt i,n,*nnz; 107 108 /* TODO: add check for shared dofs and raise error */ 109 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 110 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 111 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 112 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 113 /* local change of basis for pressures */ 114 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 115 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 116 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 117 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 118 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 119 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */ 120 for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */ 121 nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */ 122 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 123 ierr = PetscFree(nnz);CHKERRQ(ierr); 124 /* set identity on velocities */ 125 for (i=0;i<n-nz;i++) { 126 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 127 } 128 /* set change on pressures */ 129 for (i=0;i<nz-1;i++) { 130 PetscScalar vals[2]; 131 PetscInt cols[2]; 132 133 /* TODO: add quadrature */ 134 cols[0] = idxs[i]; 135 cols[1] = idxs[nz-1]; 136 vals[0] = 1.; 137 vals[1] = 1./nz; 138 ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 139 } 140 ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr); 141 for (i=0;i<nz-1;i++) array[i] = -1.; 142 array[nz-1] = 1./nz; 143 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 144 ierr = PetscFree(array);CHKERRQ(ierr); 145 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 146 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 147 /* TODO: need optimization? */ 148 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 149 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 150 /* store local and global idxs for p0 */ 151 pcbddc->benign_p0_lidx = idxs[nz-1]; 152 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 153 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 154 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 155 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 156 } else { /* this is unlikely to happen but, just in case, destroy the empty IS */ 157 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 158 } 159 *zerodiaglocal = zerodiag; 160 PetscFunctionReturn(0); 161 } 162 163 #undef __FUNCT__ 164 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 165 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 166 { 167 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 168 PetscErrorCode ierr; 169 170 PetscFunctionBegin; 171 if (!pcbddc->benign_sf) { 172 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 173 if (pcbddc->benign_p0_gidx >= 0) { 174 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 175 } else { 176 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr); 177 } 178 } 179 if (get) { /* use SF to get values */ 180 PetscScalar *array; 181 182 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 183 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 184 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr); 185 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 186 } else { /* use VecSetValue */ 187 if (pcbddc->benign_p0_gidx >= 0) { 188 ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 189 } 190 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 191 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 198 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 199 { 200 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 201 PetscErrorCode ierr; 202 203 PetscFunctionBegin; 204 /* TODO: add error checking 205 - avoid nested pop (or push) calls. 206 - cannot push before pop. 207 - cannot call this if pcbddc->local_mat is NULL 208 */ 209 if (pcbddc->benign_p0_lidx < 0) { 210 PetscFunctionReturn(0); 211 } 212 if (pop) { 213 const PetscInt *cB0_cols; 214 PetscInt cB0_ncol; 215 const PetscScalar *cB0_vals; 216 217 /* extract B_0 */ 218 ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 219 pcbddc->B0_ncol = cB0_ncol; 220 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 221 ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr); 222 ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr); 223 ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr); 224 ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr); 225 /* remove rows and cols from local problem */ 226 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 227 ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr); 228 } else { /* push */ 229 ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 230 ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr); 231 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr); 232 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 233 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "PCBDDCAdaptiveSelection" 240 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 241 { 242 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 243 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 244 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 245 PetscBLASInt *B_iwork,*B_ifail; 246 PetscScalar *work,lwork; 247 PetscScalar *St,*S,*eigv; 248 PetscScalar *Sarray,*Starray; 249 PetscReal *eigs,thresh; 250 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 251 PetscBool allocated_S_St; 252 #if defined(PETSC_USE_COMPLEX) 253 PetscReal *rwork; 254 #endif 255 PetscErrorCode ierr; 256 257 PetscFunctionBegin; 258 if (!sub_schurs->use_mumps) { 259 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 260 } 261 262 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 263 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 264 } 265 266 if (pcbddc->dbg_flag) { 267 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 268 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 269 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 270 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 271 } 272 273 if (pcbddc->dbg_flag) { 274 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 275 } 276 277 /* max size of subsets */ 278 mss = 0; 279 for (i=0;i<sub_schurs->n_subs;i++) { 280 PetscInt subset_size; 281 282 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 283 mss = PetscMax(mss,subset_size); 284 } 285 286 /* min/max and threshold */ 287 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 288 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 289 nmax = PetscMax(nmin,nmax); 290 allocated_S_St = PETSC_FALSE; 291 if (nmin) { 292 allocated_S_St = PETSC_TRUE; 293 } 294 295 /* allocate lapack workspace */ 296 cum = cum2 = 0; 297 maxneigs = 0; 298 for (i=0;i<sub_schurs->n_subs;i++) { 299 PetscInt n,subset_size; 300 301 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 302 n = PetscMin(subset_size,nmax); 303 cum += subset_size; 304 cum2 += subset_size*n; 305 maxneigs = PetscMax(maxneigs,n); 306 } 307 if (mss) { 308 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 309 PetscBLASInt B_itype = 1; 310 PetscBLASInt B_N = mss; 311 PetscReal zero = 0.0; 312 PetscReal eps = 0.0; /* dlamch? */ 313 314 B_lwork = -1; 315 S = NULL; 316 St = NULL; 317 eigs = NULL; 318 eigv = NULL; 319 B_iwork = NULL; 320 B_ifail = NULL; 321 #if defined(PETSC_USE_COMPLEX) 322 rwork = NULL; 323 #endif 324 thresh = 1.0; 325 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 326 #if defined(PETSC_USE_COMPLEX) 327 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 328 #else 329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 330 #endif 331 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 332 ierr = PetscFPTrapPop();CHKERRQ(ierr); 333 } else { 334 /* TODO */ 335 } 336 } else { 337 lwork = 0; 338 } 339 340 nv = 0; 341 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 342 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 343 } 344 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 345 if (allocated_S_St) { 346 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 347 } 348 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 349 #if defined(PETSC_USE_COMPLEX) 350 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 351 #endif 352 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 353 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 354 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 355 nv+cum,&pcbddc->adaptive_constraints_idxs, 356 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 357 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 358 359 maxneigs = 0; 360 cum = cum2 = cumarray = 0; 361 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 362 pcbddc->adaptive_constraints_data_ptr[0] = 0; 363 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 364 const PetscInt *idxs; 365 366 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 367 for (cum=0;cum<nv;cum++) { 368 pcbddc->adaptive_constraints_n[cum] = 1; 369 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 370 pcbddc->adaptive_constraints_data[cum] = 1.0; 371 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 372 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 373 } 374 cum2 = cum; 375 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 376 } 377 378 if (mss) { /* multilevel */ 379 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 380 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 381 } 382 383 for (i=0;i<sub_schurs->n_subs;i++) { 384 385 const PetscInt *idxs; 386 PetscReal infty = PETSC_MAX_REAL; 387 PetscInt j,subset_size,eigs_start = 0; 388 PetscBLASInt B_N; 389 PetscBool same_data = PETSC_FALSE; 390 391 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 392 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 393 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 394 if (sub_schurs->is_hermitian) { 395 PetscInt j,k; 396 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 397 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 398 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 399 } 400 for (j=0;j<subset_size;j++) { 401 for (k=j;k<subset_size;k++) { 402 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 403 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 404 } 405 } 406 } else { 407 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 408 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 409 } 410 } else { 411 S = Sarray + cumarray; 412 St = Starray + cumarray; 413 } 414 415 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 416 /* see if we can save some work */ 417 if (sub_schurs->n_subs == 1) { 418 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 419 } 420 421 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 422 B_neigs = 0; 423 } else { 424 /* Threshold: this is an heuristic for edges */ 425 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 426 427 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 428 PetscBLASInt B_itype = 1; 429 PetscBLASInt B_IL, B_IU; 430 PetscReal eps = -1.0; /* dlamch? */ 431 PetscInt nmin_s; 432 433 if (pcbddc->dbg_flag) { 434 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 435 } 436 437 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 438 if (thresh > 1.+PETSC_SMALL) { 439 440 /* ask for eigenvalues larger than thresh */ 441 #if defined(PETSC_USE_COMPLEX) 442 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 443 #else 444 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 445 #endif 446 } else { 447 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 448 B_IL = 1; 449 #if defined(PETSC_USE_COMPLEX) 450 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 451 #else 452 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 453 #endif 454 } 455 ierr = PetscFPTrapPop();CHKERRQ(ierr); 456 if (B_ierr) { 457 if (B_ierr < 0 ) { 458 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 459 } else if (B_ierr <= B_N) { 460 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 461 } else { 462 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 463 } 464 } 465 466 if (B_neigs > nmax) { 467 if (pcbddc->dbg_flag) { 468 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 469 } 470 eigs_start = B_neigs -nmax; 471 B_neigs = nmax; 472 } 473 474 nmin_s = PetscMin(nmin,B_N); 475 if (B_neigs < nmin_s) { 476 PetscBLASInt B_neigs2; 477 478 B_IU = B_N - B_neigs; 479 B_IL = B_N - nmin_s + 1; 480 if (pcbddc->dbg_flag) { 481 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 482 } 483 if (sub_schurs->is_hermitian) { 484 PetscInt j; 485 for (j=0;j<subset_size;j++) { 486 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 487 } 488 for (j=0;j<subset_size;j++) { 489 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 490 } 491 } else { 492 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 493 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 494 } 495 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 496 #if defined(PETSC_USE_COMPLEX) 497 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 498 #else 499 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 500 #endif 501 ierr = PetscFPTrapPop();CHKERRQ(ierr); 502 B_neigs += B_neigs2; 503 } 504 if (B_ierr) { 505 if (B_ierr < 0 ) { 506 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 507 } else if (B_ierr <= B_N) { 508 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 509 } else { 510 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 511 } 512 } 513 if (pcbddc->dbg_flag) { 514 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 515 for (j=0;j<B_neigs;j++) { 516 if (eigs[j] == 0.0) { 517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 518 } else { 519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 520 } 521 } 522 } 523 } else { 524 /* TODO */ 525 } 526 } 527 maxneigs = PetscMax(B_neigs,maxneigs); 528 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 529 if (B_neigs) { 530 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 531 532 if (pcbddc->dbg_flag > 1) { 533 PetscInt ii; 534 for (ii=0;ii<B_neigs;ii++) { 535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 536 for (j=0;j<B_N;j++) { 537 #if defined(PETSC_USE_COMPLEX) 538 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 539 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 541 #else 542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 543 #endif 544 } 545 } 546 } 547 #if 0 548 for (j=0;j<B_neigs;j++) { 549 PetscBLASInt Blas_N,Blas_one = 1.0; 550 PetscScalar norm; 551 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 552 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 553 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 554 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 555 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 556 } else { 557 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 558 } 559 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 560 } 561 #endif 562 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 563 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 564 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 565 cum++; 566 } 567 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 568 /* shift for next computation */ 569 cumarray += subset_size*subset_size; 570 } 571 if (pcbddc->dbg_flag) { 572 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 573 } 574 575 if (mss) { 576 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 577 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 578 /* destroy matrices (junk) */ 579 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 580 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 581 } 582 if (allocated_S_St) { 583 ierr = PetscFree2(S,St);CHKERRQ(ierr); 584 } 585 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 586 #if defined(PETSC_USE_COMPLEX) 587 ierr = PetscFree(rwork);CHKERRQ(ierr); 588 #endif 589 if (pcbddc->dbg_flag) { 590 PetscInt maxneigs_r; 591 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 592 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 593 } 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "PCBDDCSetUpSolvers" 599 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 600 { 601 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 602 PetscScalar *coarse_submat_vals; 603 PetscErrorCode ierr; 604 605 PetscFunctionBegin; 606 /* Setup local scatters R_to_B and (optionally) R_to_D */ 607 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 608 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 609 610 /* Setup local neumann solver ksp_R */ 611 /* PCBDDCSetUpLocalScatters should be called first! */ 612 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 613 614 /* Change global null space passed in by the user if change of basis has been requested */ 615 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 616 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 617 } 618 619 /* 620 Setup local correction and local part of coarse basis. 621 Gives back the dense local part of the coarse matrix in column major ordering 622 */ 623 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 624 625 /* Compute total number of coarse nodes and setup coarse solver */ 626 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 627 628 /* free */ 629 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "PCBDDCResetCustomization" 635 PetscErrorCode PCBDDCResetCustomization(PC pc) 636 { 637 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 638 PetscErrorCode ierr; 639 640 PetscFunctionBegin; 641 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 642 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 643 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 644 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 645 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 646 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 647 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 648 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 649 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 650 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 651 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNCT__ 656 #define __FUNCT__ "PCBDDCResetTopography" 657 PetscErrorCode PCBDDCResetTopography(PC pc) 658 { 659 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 660 PetscErrorCode ierr; 661 662 PetscFunctionBegin; 663 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 664 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 665 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 666 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 667 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNCT__ 672 #define __FUNCT__ "PCBDDCResetSolvers" 673 PetscErrorCode PCBDDCResetSolvers(PC pc) 674 { 675 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 676 PetscScalar *array; 677 PetscErrorCode ierr; 678 679 PetscFunctionBegin; 680 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 681 if (pcbddc->coarse_phi_B) { 682 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 683 ierr = PetscFree(array);CHKERRQ(ierr); 684 } 685 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 686 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 687 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 688 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 689 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 690 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 691 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 692 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 693 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 694 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 695 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 696 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 697 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 698 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 699 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 700 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 701 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 702 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 703 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 704 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 705 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 706 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 707 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 708 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 709 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 710 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 711 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 712 ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 718 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 719 { 720 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 721 PC_IS *pcis = (PC_IS*)pc->data; 722 VecType impVecType; 723 PetscInt n_constraints,n_R,old_size,n_benign; 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 if (!pcbddc->ConstraintMatrix) { 728 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 729 } 730 /* get sizes */ 731 n_benign = 0; 732 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 733 n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices; 734 n_R = pcis->n - pcbddc->n_vertices; 735 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 736 /* local work vectors (try to avoid unneeded work)*/ 737 /* R nodes */ 738 old_size = -1; 739 if (pcbddc->vec1_R) { 740 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 741 } 742 if (n_R != old_size) { 743 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 744 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 745 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 746 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 747 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 748 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 749 } 750 /* local primal dofs */ 751 old_size = -1; 752 if (pcbddc->vec1_P) { 753 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 754 } 755 if (pcbddc->local_primal_size != old_size) { 756 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 757 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 758 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 759 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 760 } 761 /* local explicit constraints */ 762 old_size = -1; 763 if (pcbddc->vec1_C) { 764 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 765 } 766 if (n_constraints && n_constraints != old_size) { 767 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 768 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 769 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 770 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 771 } 772 PetscFunctionReturn(0); 773 } 774 775 #undef __FUNCT__ 776 #define __FUNCT__ "PCBDDCSetUpCorrection" 777 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 778 { 779 PetscErrorCode ierr; 780 /* pointers to pcis and pcbddc */ 781 PC_IS* pcis = (PC_IS*)pc->data; 782 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 783 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 784 /* submatrices of local problem */ 785 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 786 /* submatrices of benign trick */ 787 Mat B0_V = NULL; 788 /* submatrices of local coarse problem */ 789 Mat S_VV,S_CV,S_VC,S_CC; 790 /* working matrices */ 791 Mat C_CR; 792 /* additional working stuff */ 793 PC pc_R; 794 Mat F,B0 = NULL; 795 PetscBool isLU,isCHOL,isILU; 796 797 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 798 PetscScalar *work; 799 PetscInt *idx_V_B; 800 PetscInt n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0; 801 PetscInt i,n_R,n_D,n_B; 802 PetscBool unsymmetric_check; 803 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 804 MatType impMatType; 805 /* some shortcuts to scalars */ 806 PetscScalar one=1.0,m_one=-1.0; 807 808 PetscFunctionBegin; 809 n_benign = 0; 810 if (pcbddc->benign_p0_lidx >= 0) n_benign = 1; 811 n_vertices = pcbddc->n_vertices; 812 n_constraints = pcbddc->local_primal_size - n_benign - n_vertices; 813 /* Set Non-overlapping dimensions */ 814 n_B = pcis->n_B; 815 n_D = pcis->n - n_B; 816 n_R = pcis->n - n_vertices; 817 818 /* Set types for local objects needed by BDDC precondtioner */ 819 impMatType = MATSEQDENSE; 820 821 /* vertices in boundary numbering */ 822 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 823 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 824 if (i != n_vertices) { 825 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 826 } 827 828 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 829 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 830 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 831 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 832 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 833 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 834 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 835 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 836 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 837 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 838 839 unsymmetric_check = PETSC_FALSE; 840 /* allocate workspace */ 841 n = 0; 842 if (n_constraints) { 843 n += n_R*n_constraints; 844 } 845 if (n_vertices) { 846 n = PetscMax(2*n_R*n_vertices,n); 847 n = PetscMax((n_R+n_B)*n_vertices,n); 848 } 849 if (!pcbddc->symmetric_primal) { 850 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 851 unsymmetric_check = PETSC_TRUE; 852 } 853 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 854 855 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 856 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 857 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 858 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 859 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 860 if (isLU || isILU || isCHOL) { 861 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 862 } else if (sub_schurs->reuse_mumps) { 863 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 864 MatFactorType type; 865 866 F = reuse_mumps->F; 867 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 868 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 869 } else { 870 F = NULL; 871 } 872 873 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 874 if (n_constraints) { 875 Mat M1,M2,M3; 876 Mat auxmat; 877 IS is_aux; 878 PetscScalar *array,*array2; 879 880 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 881 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 882 883 /* Extract constraints on R nodes: C_{CR} */ 884 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 885 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 886 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 887 888 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 889 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 890 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 891 for (i=0;i<n_constraints;i++) { 892 const PetscScalar *row_cmat_values; 893 const PetscInt *row_cmat_indices; 894 PetscInt size_of_constraint,j; 895 896 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 897 for (j=0;j<size_of_constraint;j++) { 898 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 899 } 900 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 901 } 902 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 903 if (F) { 904 Mat B; 905 906 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 907 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 908 ierr = MatDestroy(&B);CHKERRQ(ierr); 909 } else { 910 PetscScalar *marr; 911 912 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 913 for (i=0;i<n_constraints;i++) { 914 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 915 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 916 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 917 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 918 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 919 } 920 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 921 } 922 if (!pcbddc->switch_static) { 923 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 924 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 925 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 926 for (i=0;i<n_constraints;i++) { 927 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 928 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 929 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 932 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 933 } 934 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 935 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 936 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 937 } else { 938 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 939 pcbddc->local_auxmat2 = local_auxmat2_R; 940 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 941 } 942 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 943 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 944 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 945 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 946 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 947 if (isCHOL) { 948 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 949 } else { 950 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 951 } 952 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 953 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 954 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 955 ierr = MatDestroy(&M2);CHKERRQ(ierr); 956 ierr = MatDestroy(&M3);CHKERRQ(ierr); 957 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 958 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 959 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 960 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 961 ierr = MatDestroy(&M1);CHKERRQ(ierr); 962 } 963 /* Get submatrices from subdomain matrix */ 964 if (n_benign) { 965 IS dummy; 966 Mat B0_R; 967 PetscReal norm; 968 PetscInt ii[2]; 969 970 ii[0] = 0; 971 ii[1] = pcbddc->B0_ncol; 972 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr); 973 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 974 ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 975 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 976 if (norm > PETSC_SMALL) { 977 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 978 } 979 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 980 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 981 } 982 983 if (n_vertices) { 984 IS is_aux; 985 986 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 987 IS tis; 988 989 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 990 ierr = ISSort(tis);CHKERRQ(ierr); 991 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 992 ierr = ISDestroy(&tis);CHKERRQ(ierr); 993 } else { 994 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 995 } 996 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 997 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 998 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 999 if (n_benign) { 1000 IS dummy; 1001 1002 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr); 1003 ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1004 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1005 } 1006 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1007 } 1008 1009 /* Matrix of coarse basis functions (local) */ 1010 if (pcbddc->coarse_phi_B) { 1011 PetscInt on_B,on_primal,on_D=n_D; 1012 if (pcbddc->coarse_phi_D) { 1013 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1014 } 1015 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1016 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1017 PetscScalar *marray; 1018 1019 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1020 ierr = PetscFree(marray);CHKERRQ(ierr); 1021 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1022 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1023 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1024 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1025 } 1026 } 1027 1028 if (!pcbddc->coarse_phi_B) { 1029 PetscScalar *marray; 1030 1031 n = n_B*pcbddc->local_primal_size; 1032 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1033 n += n_D*pcbddc->local_primal_size; 1034 } 1035 if (!pcbddc->symmetric_primal) { 1036 n *= 2; 1037 } 1038 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1039 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1040 n = n_B*pcbddc->local_primal_size; 1041 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1042 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1043 n += n_D*pcbddc->local_primal_size; 1044 } 1045 if (!pcbddc->symmetric_primal) { 1046 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1047 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1048 n = n_B*pcbddc->local_primal_size; 1049 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1050 } 1051 } else { 1052 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1053 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1054 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1055 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1056 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1057 } 1058 } 1059 } 1060 1061 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1062 if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1063 const PetscInt *idxs; 1064 1065 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1066 ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr); 1067 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1068 } 1069 1070 /* vertices */ 1071 if (n_vertices) { 1072 1073 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1074 1075 if (n_R) { 1076 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1077 PetscBLASInt B_N,B_one = 1; 1078 PetscScalar *x,*y; 1079 PetscBool isseqaij; 1080 1081 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1082 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1083 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1084 if (F) { /* TODO could be optimized for symmetric problems */ 1085 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1086 } else { 1087 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1088 for (i=0;i<n_vertices;i++) { 1089 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1090 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1091 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1092 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1093 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1094 } 1095 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1096 } 1097 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1098 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1099 if (n_constraints) { 1100 Mat B; 1101 1102 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1103 for (i=0;i<n_vertices;i++) { 1104 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1105 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1106 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1107 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1109 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1110 } 1111 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1112 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1113 ierr = MatDestroy(&B);CHKERRQ(ierr); 1114 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1115 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1116 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1117 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1118 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1119 ierr = MatDestroy(&B);CHKERRQ(ierr); 1120 } 1121 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1122 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1123 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1124 } 1125 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1126 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1127 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1128 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1129 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1130 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1131 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1132 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1133 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1134 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1135 } else { 1136 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1137 } 1138 if (n_benign) { 1139 const PetscScalar *vals; 1140 const PetscInt *idxs; 1141 PetscInt n,j; 1142 1143 ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1144 for (j=0;j<n;j++) { 1145 coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1146 coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j]; 1147 } 1148 ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr); 1149 } 1150 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1151 1152 /* coarse basis functions */ 1153 for (i=0;i<n_vertices;i++) { 1154 PetscScalar *y; 1155 1156 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1157 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1158 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1159 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1160 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1161 y[n_B*i+idx_V_B[i]] = 1.0; 1162 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1163 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1164 1165 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1166 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1167 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1168 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1171 if (n_benign) y[n_D*i+p0_lidx_I] = 0.0; 1172 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1173 } 1174 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1175 } 1176 /* if n_R == 0 the object is not destroyed */ 1177 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1178 } 1179 1180 if (n_constraints) { 1181 Mat B; 1182 1183 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1184 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1185 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1186 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1187 if (n_vertices) { 1188 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1189 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1190 } else { 1191 Mat S_VCt; 1192 1193 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1194 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1195 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1196 } 1197 } 1198 ierr = MatDestroy(&B);CHKERRQ(ierr); 1199 /* coarse basis functions */ 1200 for (i=0;i<n_constraints;i++) { 1201 PetscScalar *y; 1202 1203 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1204 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1205 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1206 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1207 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1208 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1209 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1210 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1211 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1212 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1213 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1214 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1215 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1216 if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0; 1217 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1218 } 1219 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1220 } 1221 } 1222 if (n_constraints) { 1223 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1224 } 1225 1226 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1227 1228 /* compute other basis functions for non-symmetric problems */ 1229 if (!pcbddc->symmetric_primal) { 1230 1231 if (n_constraints) { 1232 Mat S_CCT,B_C; 1233 1234 /* this is a lazy thing */ 1235 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1236 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1237 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1238 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1239 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1240 if (n_vertices) { 1241 Mat B_V,S_VCT; 1242 1243 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1244 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1245 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1246 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1247 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1248 } 1249 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1250 } else { /* if there are no constraints, reset work */ 1251 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1252 } 1253 if (n_vertices && n_R) { 1254 Mat A_VRT; 1255 PetscScalar *marray; 1256 PetscBLASInt B_N,B_one = 1; 1257 1258 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1259 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1260 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1261 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1262 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1263 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1264 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1265 } 1266 1267 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1268 for (i=0;i<pcbddc->local_primal_size;i++) { 1269 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1270 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1271 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1272 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1273 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1274 } 1275 } else { 1276 for (i=0;i<pcbddc->local_primal_size;i++) { 1277 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1278 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1279 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1280 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1281 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1282 } 1283 } 1284 /* coarse basis functions */ 1285 for (i=0;i<pcbddc->local_primal_size;i++) { 1286 PetscScalar *y; 1287 1288 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1289 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1290 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1291 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1292 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1293 if (i<n_vertices) { 1294 y[n_B*i+idx_V_B[i]] = 1.0; 1295 } 1296 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1297 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1298 1299 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1300 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1301 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1302 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1303 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1304 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1305 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1306 } 1307 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1308 } 1309 } 1310 /* free memory */ 1311 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1312 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1313 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1314 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1315 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1316 ierr = PetscFree(work);CHKERRQ(ierr); 1317 if (n_vertices) { 1318 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1319 } 1320 if (n_constraints) { 1321 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1322 } 1323 /* Checking coarse_sub_mat and coarse basis functios */ 1324 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1325 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1326 if (pcbddc->dbg_flag) { 1327 Mat coarse_sub_mat; 1328 Mat AUXMAT,TM1,TM2,TM3,TM4; 1329 Mat coarse_phi_D,coarse_phi_B; 1330 Mat coarse_psi_D,coarse_psi_B; 1331 Mat A_II,A_BB,A_IB,A_BI; 1332 Mat C_B,CPHI; 1333 IS is_dummy; 1334 Vec mones; 1335 MatType checkmattype=MATSEQAIJ; 1336 PetscReal real_value; 1337 1338 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1339 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1340 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1341 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1342 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1343 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1344 if (unsymmetric_check) { 1345 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1346 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1347 } 1348 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1349 1350 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1351 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1352 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1353 if (unsymmetric_check) { 1354 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1355 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1356 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1357 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1358 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1359 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1360 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1361 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1362 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1363 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1364 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1365 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1366 } else { 1367 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1368 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1369 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1370 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1371 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1372 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1373 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1374 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1375 } 1376 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1377 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1378 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1379 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1380 if (n_benign) { 1381 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1382 PetscScalar *data,*data2; 1383 1384 ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr); 1385 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1386 ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1387 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1388 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1389 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1390 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1391 for (i=0;i<pcbddc->local_primal_size;i++) { 1392 data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i]; 1393 data[(i+1)*pcbddc->local_primal_size-1] += data2[i]; 1394 } 1395 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1396 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1397 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1398 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1399 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1400 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1401 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1402 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1403 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1404 } 1405 #if 0 1406 { 1407 PetscViewer viewer; 1408 char filename[256]; 1409 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1410 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1411 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1412 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1413 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1414 } 1415 #endif 1416 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1417 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1420 1421 /* check constraints */ 1422 if (!n_benign) { 1423 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1424 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1425 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1426 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1427 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1428 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1429 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1431 if (unsymmetric_check) { 1432 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1433 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1434 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1435 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1436 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1437 } 1438 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1439 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1440 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1441 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1442 } 1443 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1444 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1445 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1446 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1447 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1448 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1449 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1450 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1451 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1452 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1453 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1454 if (unsymmetric_check) { 1455 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1456 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1457 } 1458 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1459 } 1460 ierr = MatDestroy(&B0);CHKERRQ(ierr); 1461 /* get back data */ 1462 *coarse_submat_vals_n = coarse_submat_vals; 1463 PetscFunctionReturn(0); 1464 } 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1468 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1469 { 1470 Mat *work_mat; 1471 IS isrow_s,iscol_s; 1472 PetscBool rsorted,csorted; 1473 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1474 PetscErrorCode ierr; 1475 1476 PetscFunctionBegin; 1477 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1478 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1479 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1480 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1481 1482 if (!rsorted) { 1483 const PetscInt *idxs; 1484 PetscInt *idxs_sorted,i; 1485 1486 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1487 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1488 for (i=0;i<rsize;i++) { 1489 idxs_perm_r[i] = i; 1490 } 1491 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1492 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1493 for (i=0;i<rsize;i++) { 1494 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1495 } 1496 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1497 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1498 } else { 1499 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1500 isrow_s = isrow; 1501 } 1502 1503 if (!csorted) { 1504 if (isrow == iscol) { 1505 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1506 iscol_s = isrow_s; 1507 } else { 1508 const PetscInt *idxs; 1509 PetscInt *idxs_sorted,i; 1510 1511 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1512 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1513 for (i=0;i<csize;i++) { 1514 idxs_perm_c[i] = i; 1515 } 1516 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1517 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1518 for (i=0;i<csize;i++) { 1519 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1520 } 1521 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1522 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1523 } 1524 } else { 1525 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1526 iscol_s = iscol; 1527 } 1528 1529 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1530 1531 if (!rsorted || !csorted) { 1532 Mat new_mat; 1533 IS is_perm_r,is_perm_c; 1534 1535 if (!rsorted) { 1536 PetscInt *idxs_r,i; 1537 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1538 for (i=0;i<rsize;i++) { 1539 idxs_r[idxs_perm_r[i]] = i; 1540 } 1541 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1542 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1543 } else { 1544 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1545 } 1546 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1547 1548 if (!csorted) { 1549 if (isrow_s == iscol_s) { 1550 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1551 is_perm_c = is_perm_r; 1552 } else { 1553 PetscInt *idxs_c,i; 1554 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1555 for (i=0;i<csize;i++) { 1556 idxs_c[idxs_perm_c[i]] = i; 1557 } 1558 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1559 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1560 } 1561 } else { 1562 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1563 } 1564 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1565 1566 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1567 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1568 work_mat[0] = new_mat; 1569 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1570 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1571 } 1572 1573 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1574 *B = work_mat[0]; 1575 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1576 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1577 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1583 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1584 { 1585 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1586 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1587 Mat new_mat; 1588 IS is_local,is_global; 1589 PetscInt local_size; 1590 PetscBool isseqaij; 1591 PetscErrorCode ierr; 1592 1593 PetscFunctionBegin; 1594 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1595 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1596 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1597 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1598 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1599 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1600 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1601 1602 /* check */ 1603 if (pcbddc->dbg_flag) { 1604 Vec x,x_change; 1605 PetscReal error; 1606 1607 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1608 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1609 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1610 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1611 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1612 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1613 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1614 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1615 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1616 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1617 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1619 ierr = VecDestroy(&x);CHKERRQ(ierr); 1620 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1621 } 1622 1623 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1624 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1625 if (isseqaij) { 1626 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1627 } else { 1628 Mat work_mat; 1629 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1630 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1631 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1632 } 1633 if (matis->A->symmetric_set) { 1634 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1635 #if !defined(PETSC_USE_COMPLEX) 1636 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1637 #endif 1638 } 1639 /* 1640 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1641 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1642 */ 1643 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1649 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1650 { 1651 PC_IS* pcis = (PC_IS*)(pc->data); 1652 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1653 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1654 PetscInt *idx_R_local=NULL; 1655 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1656 PetscInt vbs,bs; 1657 PetscBT bitmask=NULL; 1658 PetscErrorCode ierr; 1659 1660 PetscFunctionBegin; 1661 /* 1662 No need to setup local scatters if 1663 - primal space is unchanged 1664 AND 1665 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1666 AND 1667 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1668 */ 1669 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1670 PetscFunctionReturn(0); 1671 } 1672 /* destroy old objects */ 1673 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1674 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1675 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1676 /* Set Non-overlapping dimensions */ 1677 n_B = pcis->n_B; 1678 n_D = pcis->n - n_B; 1679 n_vertices = pcbddc->n_vertices; 1680 1681 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1682 1683 /* create auxiliary bitmask and allocate workspace */ 1684 if (!sub_schurs->reuse_mumps) { 1685 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1686 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1687 for (i=0;i<n_vertices;i++) { 1688 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1689 } 1690 1691 for (i=0, n_R=0; i<pcis->n; i++) { 1692 if (!PetscBTLookup(bitmask,i)) { 1693 idx_R_local[n_R++] = i; 1694 } 1695 } 1696 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1697 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1698 1699 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1700 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1701 } 1702 1703 /* Block code */ 1704 vbs = 1; 1705 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1706 if (bs>1 && !(n_vertices%bs)) { 1707 PetscBool is_blocked = PETSC_TRUE; 1708 PetscInt *vary; 1709 if (!sub_schurs->reuse_mumps) { 1710 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1711 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1712 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1713 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1714 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1715 for (i=0; i<pcis->n/bs; i++) { 1716 if (vary[i]!=0 && vary[i]!=bs) { 1717 is_blocked = PETSC_FALSE; 1718 break; 1719 } 1720 } 1721 ierr = PetscFree(vary);CHKERRQ(ierr); 1722 } else { 1723 /* Verify directly the R set */ 1724 for (i=0; i<n_R/bs; i++) { 1725 PetscInt j,node=idx_R_local[bs*i]; 1726 for (j=1; j<bs; j++) { 1727 if (node != idx_R_local[bs*i+j]-j) { 1728 is_blocked = PETSC_FALSE; 1729 break; 1730 } 1731 } 1732 } 1733 } 1734 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1735 vbs = bs; 1736 for (i=0;i<n_R/vbs;i++) { 1737 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1738 } 1739 } 1740 } 1741 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1742 if (sub_schurs->reuse_mumps) { 1743 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1744 1745 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1746 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1747 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1748 reuse_mumps->is_R = pcbddc->is_R_local; 1749 } else { 1750 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1751 } 1752 1753 /* print some info if requested */ 1754 if (pcbddc->dbg_flag) { 1755 PetscInt benign = 0; 1756 1757 if (pcbddc->benign_p0_lidx >= 0) benign = 1; 1758 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1759 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1760 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1761 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 1762 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 1763 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr); 1764 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1765 } 1766 1767 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 1768 if (!sub_schurs->reuse_mumps) { 1769 IS is_aux1,is_aux2; 1770 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 1771 1772 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1773 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 1774 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 1775 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1776 for (i=0; i<n_D; i++) { 1777 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 1778 } 1779 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1780 for (i=0, j=0; i<n_R; i++) { 1781 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 1782 aux_array1[j++] = i; 1783 } 1784 } 1785 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1786 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1787 for (i=0, j=0; i<n_B; i++) { 1788 if (!PetscBTLookup(bitmask,is_indices[i])) { 1789 aux_array2[j++] = i; 1790 } 1791 } 1792 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1793 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 1794 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 1795 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1796 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 1797 1798 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1799 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 1800 for (i=0, j=0; i<n_R; i++) { 1801 if (PetscBTLookup(bitmask,idx_R_local[i])) { 1802 aux_array1[j++] = i; 1803 } 1804 } 1805 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 1806 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1807 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 1808 } 1809 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 1810 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1811 } else { 1812 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1813 IS tis; 1814 PetscInt schur_size; 1815 1816 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 1817 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 1818 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 1819 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1820 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1821 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 1822 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 1823 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1824 } 1825 } 1826 PetscFunctionReturn(0); 1827 } 1828 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 1832 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 1833 { 1834 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1835 PC_IS *pcis = (PC_IS*)pc->data; 1836 PC pc_temp; 1837 Mat A_RR; 1838 MatReuse reuse; 1839 PetscScalar m_one = -1.0; 1840 PetscReal value; 1841 PetscInt n_D,n_R; 1842 PetscBool use_exact,use_exact_reduced,issbaij; 1843 PetscErrorCode ierr; 1844 /* prefixes stuff */ 1845 char dir_prefix[256],neu_prefix[256],str_level[16]; 1846 size_t len; 1847 1848 PetscFunctionBegin; 1849 1850 /* compute prefixes */ 1851 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1852 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1853 if (!pcbddc->current_level) { 1854 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1855 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1856 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1857 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1858 } else { 1859 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1860 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 1861 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1862 len -= 15; /* remove "pc_bddc_coarse_" */ 1863 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 1864 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 1865 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1866 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 1867 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1868 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1869 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1870 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1871 } 1872 1873 /* DIRICHLET PROBLEM */ 1874 if (dirichlet) { 1875 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1876 if (pcbddc->local_mat->symmetric_set) { 1877 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1878 } 1879 /* Matrix for Dirichlet problem is pcis->A_II */ 1880 n_D = pcis->n - pcis->n_B; 1881 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1882 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1883 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1884 /* default */ 1885 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1886 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1887 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1888 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1889 if (issbaij) { 1890 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1891 } else { 1892 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1893 } 1894 /* Allow user's customization */ 1895 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1896 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1897 } 1898 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 1899 if (sub_schurs->reuse_mumps) { 1900 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1901 1902 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 1903 } 1904 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1905 if (!n_D) { 1906 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1907 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1908 } 1909 /* Set Up KSP for Dirichlet problem of BDDC */ 1910 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1911 /* set ksp_D into pcis data */ 1912 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1913 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1914 pcis->ksp_D = pcbddc->ksp_D; 1915 } 1916 1917 /* NEUMANN PROBLEM */ 1918 A_RR = 0; 1919 if (neumann) { 1920 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1921 PetscInt ibs,mbs; 1922 PetscBool issbaij; 1923 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1924 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1925 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1926 if (pcbddc->ksp_R) { /* already created ksp */ 1927 PetscInt nn_R; 1928 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 1929 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1930 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1931 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1932 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1933 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1934 reuse = MAT_INITIAL_MATRIX; 1935 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1936 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1937 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1938 reuse = MAT_INITIAL_MATRIX; 1939 } else { /* safe to reuse the matrix */ 1940 reuse = MAT_REUSE_MATRIX; 1941 } 1942 } 1943 /* last check */ 1944 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 1945 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1946 reuse = MAT_INITIAL_MATRIX; 1947 } 1948 } else { /* first time, so we need to create the matrix */ 1949 reuse = MAT_INITIAL_MATRIX; 1950 } 1951 /* extract A_RR */ 1952 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1953 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1954 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1955 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 1956 if (matis->A == pcbddc->local_mat) { 1957 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1958 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1959 } else { 1960 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1961 } 1962 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 1963 if (matis->A == pcbddc->local_mat) { 1964 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1965 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1966 } else { 1967 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 1968 } 1969 } 1970 if (!sub_schurs->reuse_mumps) { 1971 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1972 if (pcbddc->local_mat->symmetric_set) { 1973 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 1974 } 1975 } else { 1976 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1977 1978 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1979 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 1980 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1981 } 1982 if (!pcbddc->ksp_R) { /* create object if not present */ 1983 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1984 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1985 /* default */ 1986 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1987 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1988 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1989 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1990 if (issbaij) { 1991 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1992 } else { 1993 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1994 } 1995 /* Allow user's customization */ 1996 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1997 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1998 } 1999 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2000 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2001 if (!n_R) { 2002 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2003 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2004 } 2005 /* Reuse MUMPS solver if it is present */ 2006 if (sub_schurs->reuse_mumps) { 2007 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2008 2009 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2010 } 2011 /* Set Up KSP for Neumann problem of BDDC */ 2012 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2013 } 2014 /* free Neumann problem's matrix */ 2015 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2016 2017 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2018 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2019 if (pcbddc->dbg_flag) { 2020 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2021 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2022 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2023 } 2024 if (dirichlet) { /* Dirichlet */ 2025 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2026 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2027 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2028 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2029 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2030 /* need to be adapted? */ 2031 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2032 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2033 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2034 /* print info */ 2035 if (pcbddc->dbg_flag) { 2036 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2037 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2038 } 2039 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2040 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2041 } 2042 } 2043 if (neumann) { /* Neumann */ 2044 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2045 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2046 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2047 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2048 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2049 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2050 /* need to be adapted? */ 2051 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2052 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2053 /* print info */ 2054 if (pcbddc->dbg_flag) { 2055 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2056 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2057 } 2058 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2059 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2060 } 2061 } 2062 } 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2068 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2069 { 2070 PetscErrorCode ierr; 2071 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2072 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2073 2074 PetscFunctionBegin; 2075 if (!sub_schurs->reuse_mumps) { 2076 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2077 } 2078 if (!pcbddc->switch_static) { 2079 if (applytranspose && pcbddc->local_auxmat1) { 2080 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2081 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2082 } 2083 if (!sub_schurs->reuse_mumps) { 2084 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2085 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2086 } else { 2087 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2088 2089 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2090 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2091 } 2092 } else { 2093 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2094 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2095 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2096 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2097 if (applytranspose && pcbddc->local_auxmat1) { 2098 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2099 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2100 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2101 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2102 } 2103 } 2104 if (!sub_schurs->reuse_mumps) { 2105 if (applytranspose) { 2106 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2107 } else { 2108 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2109 } 2110 #if defined(PETSC_HAVE_MUMPS) 2111 } else { 2112 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2113 2114 if (applytranspose) { 2115 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2116 } else { 2117 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2118 } 2119 #endif 2120 } 2121 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2122 if (!pcbddc->switch_static) { 2123 if (!sub_schurs->reuse_mumps) { 2124 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2125 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2126 } else { 2127 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2128 2129 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2130 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2131 } 2132 if (!applytranspose && pcbddc->local_auxmat1) { 2133 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2134 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2135 } 2136 } else { 2137 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2138 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2139 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2140 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2141 if (!applytranspose && pcbddc->local_auxmat1) { 2142 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2143 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2144 } 2145 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2146 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2147 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2148 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2149 } 2150 PetscFunctionReturn(0); 2151 } 2152 2153 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2156 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2157 { 2158 PetscErrorCode ierr; 2159 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2160 PC_IS* pcis = (PC_IS*) (pc->data); 2161 const PetscScalar zero = 0.0; 2162 2163 PetscFunctionBegin; 2164 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2165 if (applytranspose) { 2166 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2167 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2168 } else { 2169 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2170 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2171 } 2172 2173 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2174 if (pcbddc->benign_p0_lidx >= 0) { 2175 PetscScalar *array; 2176 2177 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2178 array[pcbddc->local_primal_size-1] += pcbddc->benign_p0; 2179 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2180 } 2181 2182 /* start communications from local primal nodes to rhs of coarse solver */ 2183 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2184 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2185 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2186 2187 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2188 /* TODO remove null space when doing multilevel */ 2189 if (pcbddc->coarse_ksp) { 2190 Vec rhs,sol; 2191 2192 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2193 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2194 if (applytranspose) { 2195 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2196 } else { 2197 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2198 } 2199 } 2200 2201 /* Local solution on R nodes */ 2202 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2203 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2204 } 2205 2206 /* communications from coarse sol to local primal nodes */ 2207 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2208 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2209 2210 /* Sum contributions from two levels */ 2211 if (applytranspose) { 2212 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2213 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2214 } else { 2215 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2216 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2217 } 2218 /* store p0 */ 2219 if (pcbddc->benign_p0_lidx >= 0) { 2220 PetscScalar *array; 2221 2222 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2223 pcbddc->benign_p0 = array[pcbddc->local_primal_size-1]; 2224 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2225 } 2226 PetscFunctionReturn(0); 2227 } 2228 2229 #undef __FUNCT__ 2230 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2231 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2232 { 2233 PetscErrorCode ierr; 2234 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2235 PetscScalar *array; 2236 Vec from,to; 2237 2238 PetscFunctionBegin; 2239 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2240 from = pcbddc->coarse_vec; 2241 to = pcbddc->vec1_P; 2242 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2243 Vec tvec; 2244 2245 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2246 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2247 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2248 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2249 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2250 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2251 } 2252 } else { /* from local to global -> put data in coarse right hand side */ 2253 from = pcbddc->vec1_P; 2254 to = pcbddc->coarse_vec; 2255 } 2256 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2262 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2263 { 2264 PetscErrorCode ierr; 2265 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2266 PetscScalar *array; 2267 Vec from,to; 2268 2269 PetscFunctionBegin; 2270 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2271 from = pcbddc->coarse_vec; 2272 to = pcbddc->vec1_P; 2273 } else { /* from local to global -> put data in coarse right hand side */ 2274 from = pcbddc->vec1_P; 2275 to = pcbddc->coarse_vec; 2276 } 2277 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2278 if (smode == SCATTER_FORWARD) { 2279 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2280 Vec tvec; 2281 2282 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2283 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2284 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2285 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2286 } 2287 } else { 2288 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2289 ierr = VecResetArray(from);CHKERRQ(ierr); 2290 } 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 /* uncomment for testing purposes */ 2296 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2297 #undef __FUNCT__ 2298 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2299 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2300 { 2301 PetscErrorCode ierr; 2302 PC_IS* pcis = (PC_IS*)(pc->data); 2303 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2304 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2305 /* one and zero */ 2306 PetscScalar one=1.0,zero=0.0; 2307 /* space to store constraints and their local indices */ 2308 PetscScalar *constraints_data; 2309 PetscInt *constraints_idxs,*constraints_idxs_B; 2310 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2311 PetscInt *constraints_n; 2312 /* iterators */ 2313 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2314 /* BLAS integers */ 2315 PetscBLASInt lwork,lierr; 2316 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2317 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2318 /* reuse */ 2319 PetscInt olocal_primal_size,olocal_primal_size_cc; 2320 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2321 /* change of basis */ 2322 PetscBool qr_needed; 2323 PetscBT change_basis,qr_needed_idx; 2324 /* auxiliary stuff */ 2325 PetscInt *nnz,*is_indices; 2326 PetscInt ncc; 2327 /* some quantities */ 2328 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2329 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2330 2331 PetscFunctionBegin; 2332 /* Destroy Mat objects computed previously */ 2333 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2334 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2335 /* save info on constraints from previous setup (if any) */ 2336 olocal_primal_size = pcbddc->local_primal_size; 2337 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2338 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2339 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2340 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2341 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2342 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2343 2344 /* print some info */ 2345 if (pcbddc->dbg_flag) { 2346 IS vertices; 2347 PetscInt nv,nedges,nfaces; 2348 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2349 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2350 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2351 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2353 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2355 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2356 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2357 } 2358 2359 if (!pcbddc->adaptive_selection) { 2360 IS ISForVertices,*ISForFaces,*ISForEdges; 2361 MatNullSpace nearnullsp; 2362 const Vec *nearnullvecs; 2363 Vec *localnearnullsp; 2364 PetscScalar *array; 2365 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2366 PetscBool nnsp_has_cnst; 2367 /* LAPACK working arrays for SVD or POD */ 2368 PetscBool skip_lapack,boolforchange; 2369 PetscScalar *work; 2370 PetscReal *singular_vals; 2371 #if defined(PETSC_USE_COMPLEX) 2372 PetscReal *rwork; 2373 #endif 2374 #if defined(PETSC_MISSING_LAPACK_GESVD) 2375 PetscScalar *temp_basis,*correlation_mat; 2376 #else 2377 PetscBLASInt dummy_int=1; 2378 PetscScalar dummy_scalar=1.; 2379 #endif 2380 2381 /* Get index sets for faces, edges and vertices from graph */ 2382 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2383 /* free unneeded index sets */ 2384 if (!pcbddc->use_vertices) { 2385 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2386 } 2387 if (!pcbddc->use_edges) { 2388 for (i=0;i<n_ISForEdges;i++) { 2389 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2390 } 2391 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2392 n_ISForEdges = 0; 2393 } 2394 if (!pcbddc->use_faces) { 2395 for (i=0;i<n_ISForFaces;i++) { 2396 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2397 } 2398 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2399 n_ISForFaces = 0; 2400 } 2401 2402 #if defined(PETSC_USE_DEBUG) 2403 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2404 Also use_change_of_basis should be consistent among processors */ 2405 if (pcbddc->NullSpace) { 2406 PetscBool tbool[2],gbool[2]; 2407 2408 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2409 pcbddc->use_change_of_basis = PETSC_TRUE; 2410 if (!ISForEdges) { 2411 pcbddc->use_change_on_faces = PETSC_TRUE; 2412 } 2413 } 2414 tbool[0] = pcbddc->use_change_of_basis; 2415 tbool[1] = pcbddc->use_change_on_faces; 2416 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2417 pcbddc->use_change_of_basis = gbool[0]; 2418 pcbddc->use_change_on_faces = gbool[1]; 2419 } 2420 #endif 2421 2422 /* check if near null space is attached to global mat */ 2423 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2424 if (nearnullsp) { 2425 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2426 /* remove any stored info */ 2427 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2428 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2429 /* store information for BDDC solver reuse */ 2430 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2431 pcbddc->onearnullspace = nearnullsp; 2432 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2433 for (i=0;i<nnsp_size;i++) { 2434 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2435 } 2436 } else { /* if near null space is not provided BDDC uses constants by default */ 2437 nnsp_size = 0; 2438 nnsp_has_cnst = PETSC_TRUE; 2439 } 2440 /* get max number of constraints on a single cc */ 2441 max_constraints = nnsp_size; 2442 if (nnsp_has_cnst) max_constraints++; 2443 2444 /* 2445 Evaluate maximum storage size needed by the procedure 2446 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2447 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2448 There can be multiple constraints per connected component 2449 */ 2450 n_vertices = 0; 2451 if (ISForVertices) { 2452 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2453 } 2454 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2455 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2456 2457 total_counts = n_ISForFaces+n_ISForEdges; 2458 total_counts *= max_constraints; 2459 total_counts += n_vertices; 2460 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2461 2462 total_counts = 0; 2463 max_size_of_constraint = 0; 2464 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2465 IS used_is; 2466 if (i<n_ISForEdges) { 2467 used_is = ISForEdges[i]; 2468 } else { 2469 used_is = ISForFaces[i-n_ISForEdges]; 2470 } 2471 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2472 total_counts += j; 2473 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2474 } 2475 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2476 2477 /* get local part of global near null space vectors */ 2478 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2479 for (k=0;k<nnsp_size;k++) { 2480 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2481 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2482 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2483 } 2484 2485 /* whether or not to skip lapack calls */ 2486 skip_lapack = PETSC_TRUE; 2487 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2488 2489 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2490 if (!skip_lapack) { 2491 PetscScalar temp_work; 2492 2493 #if defined(PETSC_MISSING_LAPACK_GESVD) 2494 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2495 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2496 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2497 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2498 #if defined(PETSC_USE_COMPLEX) 2499 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2500 #endif 2501 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2502 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2503 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2504 lwork = -1; 2505 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2506 #if !defined(PETSC_USE_COMPLEX) 2507 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2508 #else 2509 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2510 #endif 2511 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2512 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2513 #else /* on missing GESVD */ 2514 /* SVD */ 2515 PetscInt max_n,min_n; 2516 max_n = max_size_of_constraint; 2517 min_n = max_constraints; 2518 if (max_size_of_constraint < max_constraints) { 2519 min_n = max_size_of_constraint; 2520 max_n = max_constraints; 2521 } 2522 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2523 #if defined(PETSC_USE_COMPLEX) 2524 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2525 #endif 2526 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2527 lwork = -1; 2528 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2529 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2530 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2531 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2532 #if !defined(PETSC_USE_COMPLEX) 2533 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2534 #else 2535 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2536 #endif 2537 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2538 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2539 #endif /* on missing GESVD */ 2540 /* Allocate optimal workspace */ 2541 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2542 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2543 } 2544 /* Now we can loop on constraining sets */ 2545 total_counts = 0; 2546 constraints_idxs_ptr[0] = 0; 2547 constraints_data_ptr[0] = 0; 2548 /* vertices */ 2549 if (n_vertices) { 2550 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2551 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2552 for (i=0;i<n_vertices;i++) { 2553 constraints_n[total_counts] = 1; 2554 constraints_data[total_counts] = 1.0; 2555 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2556 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2557 total_counts++; 2558 } 2559 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2560 n_vertices = total_counts; 2561 } 2562 2563 /* edges and faces */ 2564 total_counts_cc = total_counts; 2565 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2566 IS used_is; 2567 PetscBool idxs_copied = PETSC_FALSE; 2568 2569 if (ncc<n_ISForEdges) { 2570 used_is = ISForEdges[ncc]; 2571 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2572 } else { 2573 used_is = ISForFaces[ncc-n_ISForEdges]; 2574 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2575 } 2576 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2577 2578 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2579 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2580 /* change of basis should not be performed on local periodic nodes */ 2581 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2582 if (nnsp_has_cnst) { 2583 PetscScalar quad_value; 2584 2585 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2586 idxs_copied = PETSC_TRUE; 2587 2588 if (!pcbddc->use_nnsp_true) { 2589 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2590 } else { 2591 quad_value = 1.0; 2592 } 2593 for (j=0;j<size_of_constraint;j++) { 2594 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2595 } 2596 temp_constraints++; 2597 total_counts++; 2598 } 2599 for (k=0;k<nnsp_size;k++) { 2600 PetscReal real_value; 2601 PetscScalar *ptr_to_data; 2602 2603 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2604 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2605 for (j=0;j<size_of_constraint;j++) { 2606 ptr_to_data[j] = array[is_indices[j]]; 2607 } 2608 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2609 /* check if array is null on the connected component */ 2610 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2611 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2612 if (real_value > 0.0) { /* keep indices and values */ 2613 temp_constraints++; 2614 total_counts++; 2615 if (!idxs_copied) { 2616 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2617 idxs_copied = PETSC_TRUE; 2618 } 2619 } 2620 } 2621 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2622 valid_constraints = temp_constraints; 2623 if (!pcbddc->use_nnsp_true && temp_constraints) { 2624 if (temp_constraints == 1) { /* just normalize the constraint */ 2625 PetscScalar norm,*ptr_to_data; 2626 2627 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2628 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2629 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2630 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2631 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2632 } else { /* perform SVD */ 2633 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2634 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2635 2636 #if defined(PETSC_MISSING_LAPACK_GESVD) 2637 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2638 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2639 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2640 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2641 from that computed using LAPACKgesvd 2642 -> This is due to a different computation of eigenvectors in LAPACKheev 2643 -> The quality of the POD-computed basis will be the same */ 2644 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2645 /* Store upper triangular part of correlation matrix */ 2646 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2647 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2648 for (j=0;j<temp_constraints;j++) { 2649 for (k=0;k<j+1;k++) { 2650 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2651 } 2652 } 2653 /* compute eigenvalues and eigenvectors of correlation matrix */ 2654 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2655 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2656 #if !defined(PETSC_USE_COMPLEX) 2657 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2658 #else 2659 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2660 #endif 2661 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2662 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2663 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2664 j = 0; 2665 while (j < temp_constraints && singular_vals[j] < tol) j++; 2666 total_counts = total_counts-j; 2667 valid_constraints = temp_constraints-j; 2668 /* scale and copy POD basis into used quadrature memory */ 2669 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2670 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2671 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2672 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2673 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2674 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2675 if (j<temp_constraints) { 2676 PetscInt ii; 2677 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2678 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2679 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2680 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2681 for (k=0;k<temp_constraints-j;k++) { 2682 for (ii=0;ii<size_of_constraint;ii++) { 2683 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2684 } 2685 } 2686 } 2687 #else /* on missing GESVD */ 2688 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2689 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2690 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2691 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2692 #if !defined(PETSC_USE_COMPLEX) 2693 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2694 #else 2695 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2696 #endif 2697 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2698 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2699 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2700 k = temp_constraints; 2701 if (k > size_of_constraint) k = size_of_constraint; 2702 j = 0; 2703 while (j < k && singular_vals[k-j-1] < tol) j++; 2704 valid_constraints = k-j; 2705 total_counts = total_counts-temp_constraints+valid_constraints; 2706 #endif /* on missing GESVD */ 2707 } 2708 } 2709 /* update pointers information */ 2710 if (valid_constraints) { 2711 constraints_n[total_counts_cc] = valid_constraints; 2712 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2713 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2714 /* set change_of_basis flag */ 2715 if (boolforchange) { 2716 PetscBTSet(change_basis,total_counts_cc); 2717 } 2718 total_counts_cc++; 2719 } 2720 } 2721 /* free workspace */ 2722 if (!skip_lapack) { 2723 ierr = PetscFree(work);CHKERRQ(ierr); 2724 #if defined(PETSC_USE_COMPLEX) 2725 ierr = PetscFree(rwork);CHKERRQ(ierr); 2726 #endif 2727 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2728 #if defined(PETSC_MISSING_LAPACK_GESVD) 2729 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2730 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2731 #endif 2732 } 2733 for (k=0;k<nnsp_size;k++) { 2734 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2735 } 2736 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2737 /* free index sets of faces, edges and vertices */ 2738 for (i=0;i<n_ISForFaces;i++) { 2739 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2740 } 2741 if (n_ISForFaces) { 2742 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2743 } 2744 for (i=0;i<n_ISForEdges;i++) { 2745 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2746 } 2747 if (n_ISForEdges) { 2748 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2749 } 2750 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2751 } else { 2752 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2753 2754 total_counts = 0; 2755 n_vertices = 0; 2756 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 2757 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 2758 } 2759 max_constraints = 0; 2760 total_counts_cc = 0; 2761 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2762 total_counts += pcbddc->adaptive_constraints_n[i]; 2763 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 2764 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 2765 } 2766 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 2767 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 2768 constraints_idxs = pcbddc->adaptive_constraints_idxs; 2769 constraints_data = pcbddc->adaptive_constraints_data; 2770 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 2771 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 2772 total_counts_cc = 0; 2773 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 2774 if (pcbddc->adaptive_constraints_n[i]) { 2775 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 2776 } 2777 } 2778 #if 0 2779 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 2780 for (i=0;i<total_counts_cc;i++) { 2781 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 2782 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 2783 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 2784 printf(" %d",constraints_idxs[j]); 2785 } 2786 printf("\n"); 2787 printf("number of cc: %d\n",constraints_n[i]); 2788 } 2789 for (i=0;i<n_vertices;i++) { 2790 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 2791 } 2792 for (i=0;i<sub_schurs->n_subs;i++) { 2793 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 2794 } 2795 #endif 2796 2797 max_size_of_constraint = 0; 2798 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 2799 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 2800 /* Change of basis */ 2801 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 2802 if (pcbddc->use_change_of_basis) { 2803 for (i=0;i<sub_schurs->n_subs;i++) { 2804 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 2805 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 2806 } 2807 } 2808 } 2809 } 2810 pcbddc->local_primal_size = total_counts; 2811 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2812 ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2813 2814 /* map constraints_idxs in boundary numbering */ 2815 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 2816 if (i != constraints_idxs_ptr[total_counts_cc]) { 2817 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 2818 } 2819 2820 /* Create constraint matrix */ 2821 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2822 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 2823 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 2824 2825 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 2826 /* determine if a QR strategy is needed for change of basis */ 2827 qr_needed = PETSC_FALSE; 2828 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 2829 total_primal_vertices=0; 2830 pcbddc->local_primal_size_cc = 0; 2831 for (i=0;i<total_counts_cc;i++) { 2832 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2833 if (size_of_constraint == 1) { 2834 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 2835 pcbddc->local_primal_size_cc += 1; 2836 } else if (PetscBTLookup(change_basis,i)) { 2837 for (k=0;k<constraints_n[i];k++) { 2838 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2839 } 2840 pcbddc->local_primal_size_cc += constraints_n[i]; 2841 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 2842 PetscBTSet(qr_needed_idx,i); 2843 qr_needed = PETSC_TRUE; 2844 } 2845 } else { 2846 pcbddc->local_primal_size_cc += 1; 2847 } 2848 } 2849 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 2850 pcbddc->n_vertices = total_primal_vertices; 2851 /* permute indices in order to have a sorted set of vertices */ 2852 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2853 2854 /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */ 2855 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2856 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2857 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 2858 2859 /* nonzero structure of constraint matrix */ 2860 /* and get reference dof for local constraints */ 2861 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 2862 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 2863 2864 j = total_primal_vertices; 2865 total_counts = total_primal_vertices; 2866 cum = total_primal_vertices; 2867 for (i=n_vertices;i<total_counts_cc;i++) { 2868 if (!PetscBTLookup(change_basis,i)) { 2869 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 2870 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 2871 cum++; 2872 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2873 for (k=0;k<constraints_n[i];k++) { 2874 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 2875 nnz[j+k] = size_of_constraint; 2876 } 2877 j += constraints_n[i]; 2878 } 2879 } 2880 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 2881 ierr = PetscFree(nnz);CHKERRQ(ierr); 2882 2883 /* set values in constraint matrix */ 2884 for (i=0;i<total_primal_vertices;i++) { 2885 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 2886 } 2887 total_counts = total_primal_vertices; 2888 for (i=n_vertices;i<total_counts_cc;i++) { 2889 if (!PetscBTLookup(change_basis,i)) { 2890 PetscInt *cols; 2891 2892 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2893 cols = constraints_idxs+constraints_idxs_ptr[i]; 2894 for (k=0;k<constraints_n[i];k++) { 2895 PetscInt row = total_counts+k; 2896 PetscScalar *vals; 2897 2898 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 2899 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2900 } 2901 total_counts += constraints_n[i]; 2902 } 2903 } 2904 /* assembling */ 2905 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2907 2908 /* 2909 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2910 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 2911 */ 2912 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 2913 if (pcbddc->use_change_of_basis) { 2914 /* dual and primal dofs on a single cc */ 2915 PetscInt dual_dofs,primal_dofs; 2916 /* working stuff for GEQRF */ 2917 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 2918 PetscBLASInt lqr_work; 2919 /* working stuff for UNGQR */ 2920 PetscScalar *gqr_work,lgqr_work_t; 2921 PetscBLASInt lgqr_work; 2922 /* working stuff for TRTRS */ 2923 PetscScalar *trs_rhs; 2924 PetscBLASInt Blas_NRHS; 2925 /* pointers for values insertion into change of basis matrix */ 2926 PetscInt *start_rows,*start_cols; 2927 PetscScalar *start_vals; 2928 /* working stuff for values insertion */ 2929 PetscBT is_primal; 2930 PetscInt *aux_primal_numbering_B; 2931 /* matrix sizes */ 2932 PetscInt global_size,local_size; 2933 /* temporary change of basis */ 2934 Mat localChangeOfBasisMatrix; 2935 /* extra space for debugging */ 2936 PetscScalar *dbg_work; 2937 2938 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 2939 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 2940 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 2941 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 2942 /* nonzeros for local mat */ 2943 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 2944 for (i=0;i<pcis->n;i++) nnz[i]=1; 2945 for (i=n_vertices;i<total_counts_cc;i++) { 2946 if (PetscBTLookup(change_basis,i)) { 2947 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 2948 if (PetscBTLookup(qr_needed_idx,i)) { 2949 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 2950 } else { 2951 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 2952 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 2953 } 2954 } 2955 } 2956 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 2957 ierr = PetscFree(nnz);CHKERRQ(ierr); 2958 /* Set initial identity in the matrix */ 2959 for (i=0;i<pcis->n;i++) { 2960 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 2961 } 2962 2963 if (pcbddc->dbg_flag) { 2964 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2965 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 2966 } 2967 2968 2969 /* Now we loop on the constraints which need a change of basis */ 2970 /* 2971 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 2972 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 2973 2974 Basic blocks of change of basis matrix T computed by 2975 2976 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 2977 2978 | 1 0 ... 0 s_1/S | 2979 | 0 1 ... 0 s_2/S | 2980 | ... | 2981 | 0 ... 1 s_{n-1}/S | 2982 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 2983 2984 with S = \sum_{i=1}^n s_i^2 2985 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 2986 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 2987 2988 - QR decomposition of constraints otherwise 2989 */ 2990 if (qr_needed) { 2991 /* space to store Q */ 2992 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 2993 /* first we issue queries for optimal work */ 2994 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 2995 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2996 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2997 lqr_work = -1; 2998 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 2999 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3000 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3001 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3002 lgqr_work = -1; 3003 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3004 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3005 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3006 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3007 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3008 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3009 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3010 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3011 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3012 /* array to store scaling factors for reflectors */ 3013 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3014 /* array to store rhs and solution of triangular solver */ 3015 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3016 /* allocating workspace for check */ 3017 if (pcbddc->dbg_flag) { 3018 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3019 } 3020 } 3021 /* array to store whether a node is primal or not */ 3022 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3023 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3024 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3025 if (i != total_primal_vertices) { 3026 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3027 } 3028 for (i=0;i<total_primal_vertices;i++) { 3029 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3030 } 3031 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3032 3033 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3034 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3035 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3036 if (PetscBTLookup(change_basis,total_counts)) { 3037 /* get constraint info */ 3038 primal_dofs = constraints_n[total_counts]; 3039 dual_dofs = size_of_constraint-primal_dofs; 3040 3041 if (pcbddc->dbg_flag) { 3042 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3043 } 3044 3045 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3046 3047 /* copy quadrature constraints for change of basis check */ 3048 if (pcbddc->dbg_flag) { 3049 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3050 } 3051 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3052 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3053 3054 /* compute QR decomposition of constraints */ 3055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3056 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3058 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3059 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3060 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3061 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3062 3063 /* explictly compute R^-T */ 3064 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3065 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3066 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3067 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3068 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3069 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3070 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3071 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3072 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3073 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3074 3075 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3076 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3077 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3078 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3080 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3081 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3082 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3083 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3084 3085 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3086 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3087 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3088 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3089 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3090 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3092 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3094 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3095 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3096 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3097 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3098 3099 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3100 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3101 /* insert cols for primal dofs */ 3102 for (j=0;j<primal_dofs;j++) { 3103 start_vals = &qr_basis[j*size_of_constraint]; 3104 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3105 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3106 } 3107 /* insert cols for dual dofs */ 3108 for (j=0,k=0;j<dual_dofs;k++) { 3109 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3110 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3111 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3112 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3113 j++; 3114 } 3115 } 3116 3117 /* check change of basis */ 3118 if (pcbddc->dbg_flag) { 3119 PetscInt ii,jj; 3120 PetscBool valid_qr=PETSC_TRUE; 3121 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3123 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3124 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3126 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3127 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3128 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3129 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3130 for (jj=0;jj<size_of_constraint;jj++) { 3131 for (ii=0;ii<primal_dofs;ii++) { 3132 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3133 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3134 } 3135 } 3136 if (!valid_qr) { 3137 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3138 for (jj=0;jj<size_of_constraint;jj++) { 3139 for (ii=0;ii<primal_dofs;ii++) { 3140 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3141 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3142 } 3143 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3144 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3145 } 3146 } 3147 } 3148 } else { 3149 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3150 } 3151 } 3152 } else { /* simple transformation block */ 3153 PetscInt row,col; 3154 PetscScalar val,norm; 3155 3156 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3157 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3158 for (j=0;j<size_of_constraint;j++) { 3159 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3160 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3161 if (!PetscBTLookup(is_primal,row_B)) { 3162 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3163 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3164 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3165 } else { 3166 for (k=0;k<size_of_constraint;k++) { 3167 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3168 if (row != col) { 3169 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3170 } else { 3171 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3172 } 3173 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3174 } 3175 } 3176 } 3177 if (pcbddc->dbg_flag) { 3178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3179 } 3180 } 3181 } else { 3182 if (pcbddc->dbg_flag) { 3183 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3184 } 3185 } 3186 } 3187 3188 /* free workspace */ 3189 if (qr_needed) { 3190 if (pcbddc->dbg_flag) { 3191 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3192 } 3193 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3194 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3195 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3196 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3197 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3198 } 3199 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3200 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3201 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3202 3203 /* assembling of global change of variable */ 3204 { 3205 Mat tmat; 3206 PetscInt bs; 3207 3208 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3209 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3210 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3211 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3212 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3213 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3214 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3215 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3216 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3217 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3218 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3219 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3220 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3221 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3222 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3223 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3224 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3225 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3226 } 3227 /* check */ 3228 if (pcbddc->dbg_flag) { 3229 PetscReal error; 3230 Vec x,x_change; 3231 3232 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3233 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3234 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3235 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3236 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3237 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3238 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3239 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3240 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3241 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3242 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3243 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3244 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3245 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3246 ierr = VecDestroy(&x);CHKERRQ(ierr); 3247 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3248 } 3249 3250 /* adapt sub_schurs computed (if any) */ 3251 if (pcbddc->use_deluxe_scaling) { 3252 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3253 if (sub_schurs->S_Ej_all) { 3254 Mat S_new,tmat; 3255 ISLocalToGlobalMapping NtoSall; 3256 IS is_all_N,is_V,is_V_Sall; 3257 const PetscScalar *array; 3258 const PetscInt *idxs_V,*idxs_all; 3259 PetscInt i,n_V; 3260 3261 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3262 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3263 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3264 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3265 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3266 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3267 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3268 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3269 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3270 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3271 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3272 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3273 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3274 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3275 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3276 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3277 for (i=0;i<n_V;i++) { 3278 PetscScalar val; 3279 PetscInt idx; 3280 3281 idx = idxs_V[i]; 3282 val = array[idxs_all[idxs_V[i]]]; 3283 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3284 } 3285 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3286 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3287 sub_schurs->S_Ej_all = S_new; 3288 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3289 if (sub_schurs->sum_S_Ej_all) { 3290 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3291 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3292 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3293 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3294 sub_schurs->sum_S_Ej_all = S_new; 3295 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3296 } 3297 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3298 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3299 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3300 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3301 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3302 } 3303 } 3304 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3305 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3306 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3307 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3308 } 3309 3310 /* set up change of basis context */ 3311 if (pcbddc->ChangeOfBasisMatrix) { 3312 PCBDDCChange_ctx change_ctx; 3313 3314 if (!pcbddc->new_global_mat) { 3315 PetscInt global_size,local_size; 3316 3317 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3318 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3319 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3320 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3321 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3322 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3323 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3324 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3325 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3326 } else { 3327 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3328 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3329 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3330 } 3331 if (!pcbddc->user_ChangeOfBasisMatrix) { 3332 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3333 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3334 } else { 3335 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3336 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3337 } 3338 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3339 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3340 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3341 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3342 } 3343 3344 /* add pressure dof to set of primal nodes for numbering purposes */ 3345 if (pcbddc->benign_p0_lidx >= 0) { 3346 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx; 3347 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx; 3348 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3349 pcbddc->local_primal_size_cc++; 3350 pcbddc->local_primal_size++; 3351 } 3352 3353 /* check if a new primal space has been introduced (also take into account benign trick) */ 3354 pcbddc->new_primal_space_local = PETSC_TRUE; 3355 if (olocal_primal_size == pcbddc->local_primal_size) { 3356 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3357 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3358 if (!pcbddc->new_primal_space_local) { 3359 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3360 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3361 } 3362 } 3363 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3364 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3365 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3366 3367 /* flush dbg viewer */ 3368 if (pcbddc->dbg_flag) { 3369 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3370 } 3371 3372 /* free workspace */ 3373 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3374 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3375 if (!pcbddc->adaptive_selection) { 3376 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3377 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3378 } else { 3379 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3380 pcbddc->adaptive_constraints_idxs_ptr, 3381 pcbddc->adaptive_constraints_data_ptr, 3382 pcbddc->adaptive_constraints_idxs, 3383 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3384 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3385 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3386 } 3387 PetscFunctionReturn(0); 3388 } 3389 3390 #undef __FUNCT__ 3391 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3392 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3393 { 3394 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3395 PC_IS *pcis = (PC_IS*)pc->data; 3396 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3397 PetscInt ierr,i,vertex_size,N; 3398 PetscViewer viewer=pcbddc->dbg_viewer; 3399 3400 PetscFunctionBegin; 3401 /* Reset previously computed graph */ 3402 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3403 /* Init local Graph struct */ 3404 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3405 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3406 3407 /* Check validity of the csr graph passed in by the user */ 3408 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3409 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 3410 } 3411 3412 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3413 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3414 PetscInt *xadj,*adjncy; 3415 PetscInt nvtxs; 3416 PetscBool flg_row=PETSC_FALSE; 3417 3418 if (pcbddc->use_local_adj) { 3419 3420 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3421 if (flg_row) { 3422 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3423 pcbddc->computed_rowadj = PETSC_TRUE; 3424 } 3425 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3426 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3427 IS is_dummy; 3428 ISLocalToGlobalMapping l2gmap_dummy; 3429 PetscInt j,sum; 3430 PetscInt *cxadj,*cadjncy; 3431 const PetscInt *idxs; 3432 PCBDDCGraph graph; 3433 PetscBT is_on_boundary; 3434 3435 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3436 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3437 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3438 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3439 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3440 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3441 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3442 if (flg_row) { 3443 graph->xadj = xadj; 3444 graph->adjncy = adjncy; 3445 } 3446 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3447 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3448 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3449 3450 if (pcbddc->dbg_flag) { 3451 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3452 for (i=0;i<graph->ncc;i++) { 3453 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3454 } 3455 } 3456 3457 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3458 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3459 for (i=0;i<pcis->n_B;i++) { 3460 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3461 } 3462 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3463 3464 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3465 sum = 0; 3466 for (i=0;i<graph->ncc;i++) { 3467 PetscInt sizecc = 0; 3468 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3469 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3470 sizecc++; 3471 } 3472 } 3473 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3474 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3475 cxadj[graph->queue[j]] = sizecc; 3476 } 3477 } 3478 sum += sizecc*sizecc; 3479 } 3480 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3481 sum = 0; 3482 for (i=0;i<pcis->n;i++) { 3483 PetscInt temp = cxadj[i]; 3484 cxadj[i] = sum; 3485 sum += temp; 3486 } 3487 cxadj[pcis->n] = sum; 3488 for (i=0;i<graph->ncc;i++) { 3489 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3490 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3491 PetscInt k,sizecc = 0; 3492 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3493 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3494 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3495 sizecc++; 3496 } 3497 } 3498 } 3499 } 3500 } 3501 if (sum) { 3502 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3503 } else { 3504 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3505 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3506 } 3507 graph->xadj = 0; 3508 graph->adjncy = 0; 3509 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3510 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3511 } 3512 } 3513 if (pcbddc->dbg_flag) { 3514 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3515 } 3516 3517 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3518 vertex_size = 1; 3519 if (pcbddc->user_provided_isfordofs) { 3520 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3521 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3522 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3523 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3524 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3525 } 3526 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3527 pcbddc->n_ISForDofs = 0; 3528 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3529 } 3530 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3531 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3532 } else { 3533 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3534 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3535 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3536 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3537 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3538 } 3539 } 3540 } 3541 3542 /* Setup of Graph */ 3543 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3544 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3545 } 3546 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3547 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3548 } 3549 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3550 3551 /* Graph's connected components analysis */ 3552 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3553 3554 /* print some info to stdout */ 3555 if (pcbddc->dbg_flag) { 3556 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3557 } 3558 3559 /* mark topography has done */ 3560 pcbddc->recompute_topography = PETSC_FALSE; 3561 PetscFunctionReturn(0); 3562 } 3563 3564 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3565 #undef __FUNCT__ 3566 #define __FUNCT__ "PCBDDCSubsetNumbering" 3567 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3568 { 3569 PetscSF sf; 3570 PetscLayout map; 3571 const PetscInt *idxs; 3572 PetscInt *leaf_data,*root_data,*gidxs; 3573 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3574 PetscInt n_n,nlocals,start,first_index; 3575 PetscMPIInt commsize; 3576 PetscBool first_found; 3577 PetscErrorCode ierr; 3578 3579 PetscFunctionBegin; 3580 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3581 if (subset_mult) { 3582 PetscCheckSameComm(subset,1,subset_mult,2); 3583 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3584 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3585 } 3586 /* create workspace layout for computing global indices of subset */ 3587 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3588 lbounds[0] = lbounds[1] = 0; 3589 for (i=0;i<n;i++) { 3590 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3591 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3592 } 3593 lbounds[0] = -lbounds[0]; 3594 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3595 gbounds[0] = -gbounds[0]; 3596 N = gbounds[1] - gbounds[0] + 1; 3597 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3598 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3599 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3600 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3601 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3602 3603 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3604 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3605 if (subset_mult) { 3606 const PetscInt* idxs_mult; 3607 3608 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3609 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3610 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3611 } else { 3612 for (i=0;i<n;i++) leaf_data[i] = 1; 3613 } 3614 /* local size of new subset */ 3615 n_n = 0; 3616 for (i=0;i<n;i++) n_n += leaf_data[i]; 3617 3618 /* global indexes in layout */ 3619 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3620 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3621 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3622 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3623 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3624 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3625 3626 /* reduce from leaves to roots */ 3627 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3628 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3629 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3630 3631 /* count indexes in local part of layout */ 3632 nlocals = 0; 3633 first_index = -1; 3634 first_found = PETSC_FALSE; 3635 for (i=0;i<Nl;i++) { 3636 if (!first_found && root_data[i]) { 3637 first_found = PETSC_TRUE; 3638 first_index = i; 3639 } 3640 nlocals += root_data[i]; 3641 } 3642 3643 /* cumulative of number of indexes and size of subset without holes */ 3644 #if defined(PETSC_HAVE_MPI_EXSCAN) 3645 start = 0; 3646 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3647 #else 3648 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3649 start = start-nlocals; 3650 #endif 3651 3652 if (N_n) { /* compute total size of new subset if requested */ 3653 *N_n = start + nlocals; 3654 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3655 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3656 } 3657 3658 /* adapt root data with cumulative */ 3659 if (first_found) { 3660 PetscInt old_index; 3661 3662 root_data[first_index] += start; 3663 old_index = first_index; 3664 for (i=first_index+1;i<Nl;i++) { 3665 if (root_data[i]) { 3666 root_data[i] += root_data[old_index]; 3667 old_index = i; 3668 } 3669 } 3670 } 3671 3672 /* from roots to leaves */ 3673 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3674 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3675 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3676 3677 /* create new IS with global indexes without holes */ 3678 if (subset_mult) { 3679 const PetscInt* idxs_mult; 3680 PetscInt cum; 3681 3682 cum = 0; 3683 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3684 for (i=0;i<n;i++) { 3685 PetscInt j; 3686 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3687 } 3688 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3689 } else { 3690 for (i=0;i<n;i++) { 3691 gidxs[i] = leaf_data[i]-1; 3692 } 3693 } 3694 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3695 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3696 PetscFunctionReturn(0); 3697 } 3698 3699 #undef __FUNCT__ 3700 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3701 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3702 { 3703 PetscInt i,j; 3704 PetscScalar *alphas; 3705 PetscErrorCode ierr; 3706 3707 PetscFunctionBegin; 3708 /* this implements stabilized Gram-Schmidt */ 3709 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3710 for (i=0;i<n;i++) { 3711 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3712 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3713 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3714 } 3715 ierr = PetscFree(alphas);CHKERRQ(ierr); 3716 PetscFunctionReturn(0); 3717 } 3718 3719 #undef __FUNCT__ 3720 #define __FUNCT__ "MatISGetSubassemblingPattern" 3721 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3722 { 3723 IS ranks_send_to; 3724 PetscInt n_neighs,*neighs,*n_shared,**shared; 3725 PetscMPIInt size,rank,color; 3726 PetscInt *xadj,*adjncy; 3727 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 3728 PetscInt i,local_size,threshold=0; 3729 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 3730 PetscSubcomm subcomm; 3731 PetscErrorCode ierr; 3732 3733 PetscFunctionBegin; 3734 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 3735 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 3736 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 3737 3738 /* Get info on mapping */ 3739 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 3740 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3741 3742 /* build local CSR graph of subdomains' connectivity */ 3743 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 3744 xadj[0] = 0; 3745 xadj[1] = PetscMax(n_neighs-1,0); 3746 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 3747 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 3748 3749 if (threshold) { 3750 PetscInt xadj_count = 0; 3751 for (i=1;i<n_neighs;i++) { 3752 if (n_shared[i] > threshold) { 3753 adjncy[xadj_count] = neighs[i]; 3754 adjncy_wgt[xadj_count] = n_shared[i]; 3755 xadj_count++; 3756 } 3757 } 3758 xadj[1] = xadj_count; 3759 } else { 3760 if (xadj[1]) { 3761 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 3762 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 3763 } 3764 } 3765 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 3766 if (use_square) { 3767 for (i=0;i<xadj[1];i++) { 3768 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 3769 } 3770 } 3771 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3772 3773 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 3774 3775 /* 3776 Restrict work on active processes only. 3777 */ 3778 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 3779 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 3780 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 3781 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 3782 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 3783 if (color) { 3784 ierr = PetscFree(xadj);CHKERRQ(ierr); 3785 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3786 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3787 } else { 3788 Mat subdomain_adj; 3789 IS new_ranks,new_ranks_contig; 3790 MatPartitioning partitioner; 3791 PetscInt prank,rstart=0,rend=0; 3792 PetscInt *is_indices,*oldranks; 3793 PetscBool aggregate; 3794 3795 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 3796 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 3797 prank = rank; 3798 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 3799 /* 3800 for (i=0;i<size;i++) { 3801 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 3802 } 3803 */ 3804 for (i=0;i<xadj[1];i++) { 3805 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 3806 } 3807 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 3808 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 3809 if (aggregate) { 3810 PetscInt lrows,row,ncols,*cols; 3811 PetscMPIInt nrank; 3812 PetscScalar *vals; 3813 3814 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 3815 lrows = 0; 3816 if (nrank<redprocs) { 3817 lrows = size/redprocs; 3818 if (nrank<size%redprocs) lrows++; 3819 } 3820 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 3821 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 3822 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3823 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 3824 row = nrank; 3825 ncols = xadj[1]-xadj[0]; 3826 cols = adjncy; 3827 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 3828 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 3829 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3830 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3831 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3832 ierr = PetscFree(xadj);CHKERRQ(ierr); 3833 ierr = PetscFree(adjncy);CHKERRQ(ierr); 3834 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 3835 ierr = PetscFree(vals);CHKERRQ(ierr); 3836 } else { 3837 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 3838 } 3839 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 3840 3841 /* Partition */ 3842 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 3843 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 3844 if (use_vwgt) { 3845 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 3846 v_wgt[0] = local_size; 3847 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 3848 } 3849 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 3850 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 3851 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 3852 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 3853 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 3854 3855 /* renumber new_ranks to avoid "holes" in new set of processors */ 3856 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 3857 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 3858 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3859 if (!redprocs) { 3860 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 3861 } else { 3862 PetscInt idxs[1]; 3863 PetscMPIInt tag; 3864 MPI_Request *reqs; 3865 3866 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 3867 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 3868 for (i=rstart;i<rend;i++) { 3869 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 3870 } 3871 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 3872 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3873 ierr = PetscFree(reqs);CHKERRQ(ierr); 3874 ranks_send_to_idx[0] = oldranks[idxs[0]]; 3875 } 3876 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3877 /* clean up */ 3878 ierr = PetscFree(oldranks);CHKERRQ(ierr); 3879 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 3880 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 3881 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 3882 } 3883 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 3884 3885 /* assemble parallel IS for sends */ 3886 i = 1; 3887 if (color) i=0; 3888 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 3889 /* get back IS */ 3890 *is_sends = ranks_send_to; 3891 PetscFunctionReturn(0); 3892 } 3893 3894 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatISSubassemble" 3898 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 3899 { 3900 Mat local_mat; 3901 IS is_sends_internal; 3902 PetscInt rows,cols,new_local_rows; 3903 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 3904 PetscBool ismatis,isdense,newisdense,destroy_mat; 3905 ISLocalToGlobalMapping l2gmap; 3906 PetscInt* l2gmap_indices; 3907 const PetscInt* is_indices; 3908 MatType new_local_type; 3909 /* buffers */ 3910 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 3911 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 3912 PetscInt *recv_buffer_idxs_local; 3913 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 3914 /* MPI */ 3915 MPI_Comm comm,comm_n; 3916 PetscSubcomm subcomm; 3917 PetscMPIInt n_sends,n_recvs,commsize; 3918 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 3919 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 3920 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 3921 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 3922 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 3923 PetscErrorCode ierr; 3924 3925 PetscFunctionBegin; 3926 /* TODO: add missing checks */ 3927 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 3928 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 3929 PetscValidLogicalCollectiveEnum(mat,reuse,5); 3930 PetscValidLogicalCollectiveInt(mat,nis,7); 3931 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 3932 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 3933 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3934 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 3935 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 3936 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 3937 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 3938 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 3939 PetscInt mrows,mcols,mnrows,mncols; 3940 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 3941 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 3942 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 3943 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 3944 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 3945 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 3946 } 3947 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 3948 PetscValidLogicalCollectiveInt(mat,bs,0); 3949 /* prepare IS for sending if not provided */ 3950 if (!is_sends) { 3951 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 3952 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 3953 } else { 3954 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 3955 is_sends_internal = is_sends; 3956 } 3957 3958 /* get comm */ 3959 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3960 3961 /* compute number of sends */ 3962 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 3963 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 3964 3965 /* compute number of receives */ 3966 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 3967 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 3968 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 3969 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 3970 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 3971 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 3972 ierr = PetscFree(iflags);CHKERRQ(ierr); 3973 3974 /* restrict comm if requested */ 3975 subcomm = 0; 3976 destroy_mat = PETSC_FALSE; 3977 if (restrict_comm) { 3978 PetscMPIInt color,subcommsize; 3979 3980 color = 0; 3981 if (restrict_full) { 3982 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 3983 } else { 3984 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 3985 } 3986 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 3987 subcommsize = commsize - subcommsize; 3988 /* check if reuse has been requested */ 3989 if (reuse == MAT_REUSE_MATRIX) { 3990 if (*mat_n) { 3991 PetscMPIInt subcommsize2; 3992 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 3993 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 3994 comm_n = PetscObjectComm((PetscObject)*mat_n); 3995 } else { 3996 comm_n = PETSC_COMM_SELF; 3997 } 3998 } else { /* MAT_INITIAL_MATRIX */ 3999 PetscMPIInt rank; 4000 4001 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4002 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4003 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4004 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4005 comm_n = PetscSubcommChild(subcomm); 4006 } 4007 /* flag to destroy *mat_n if not significative */ 4008 if (color) destroy_mat = PETSC_TRUE; 4009 } else { 4010 comm_n = comm; 4011 } 4012 4013 /* prepare send/receive buffers */ 4014 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4015 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4016 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4017 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4018 if (nis) { 4019 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4020 } 4021 4022 /* Get data from local matrices */ 4023 if (!isdense) { 4024 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4025 /* TODO: See below some guidelines on how to prepare the local buffers */ 4026 /* 4027 send_buffer_vals should contain the raw values of the local matrix 4028 send_buffer_idxs should contain: 4029 - MatType_PRIVATE type 4030 - PetscInt size_of_l2gmap 4031 - PetscInt global_row_indices[size_of_l2gmap] 4032 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4033 */ 4034 } else { 4035 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4036 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4037 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4038 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4039 send_buffer_idxs[1] = i; 4040 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4041 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4042 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4043 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4044 for (i=0;i<n_sends;i++) { 4045 ilengths_vals[is_indices[i]] = len*len; 4046 ilengths_idxs[is_indices[i]] = len+2; 4047 } 4048 } 4049 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4050 /* additional is (if any) */ 4051 if (nis) { 4052 PetscMPIInt psum; 4053 PetscInt j; 4054 for (j=0,psum=0;j<nis;j++) { 4055 PetscInt plen; 4056 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4057 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4058 psum += len+1; /* indices + lenght */ 4059 } 4060 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4061 for (j=0,psum=0;j<nis;j++) { 4062 PetscInt plen; 4063 const PetscInt *is_array_idxs; 4064 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4065 send_buffer_idxs_is[psum] = plen; 4066 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4067 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4068 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4069 psum += plen+1; /* indices + lenght */ 4070 } 4071 for (i=0;i<n_sends;i++) { 4072 ilengths_idxs_is[is_indices[i]] = psum; 4073 } 4074 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4075 } 4076 4077 buf_size_idxs = 0; 4078 buf_size_vals = 0; 4079 buf_size_idxs_is = 0; 4080 for (i=0;i<n_recvs;i++) { 4081 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4082 buf_size_vals += (PetscInt)olengths_vals[i]; 4083 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4084 } 4085 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4086 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4087 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4088 4089 /* get new tags for clean communications */ 4090 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4091 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4092 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4093 4094 /* allocate for requests */ 4095 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4096 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4097 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4098 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4099 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4100 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4101 4102 /* communications */ 4103 ptr_idxs = recv_buffer_idxs; 4104 ptr_vals = recv_buffer_vals; 4105 ptr_idxs_is = recv_buffer_idxs_is; 4106 for (i=0;i<n_recvs;i++) { 4107 source_dest = onodes[i]; 4108 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4109 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4110 ptr_idxs += olengths_idxs[i]; 4111 ptr_vals += olengths_vals[i]; 4112 if (nis) { 4113 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4114 ptr_idxs_is += olengths_idxs_is[i]; 4115 } 4116 } 4117 for (i=0;i<n_sends;i++) { 4118 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4119 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4120 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4121 if (nis) { 4122 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4123 } 4124 } 4125 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4126 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4127 4128 /* assemble new l2g map */ 4129 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4130 ptr_idxs = recv_buffer_idxs; 4131 new_local_rows = 0; 4132 for (i=0;i<n_recvs;i++) { 4133 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4134 ptr_idxs += olengths_idxs[i]; 4135 } 4136 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4137 ptr_idxs = recv_buffer_idxs; 4138 new_local_rows = 0; 4139 for (i=0;i<n_recvs;i++) { 4140 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4141 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4142 ptr_idxs += olengths_idxs[i]; 4143 } 4144 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4145 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4146 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4147 4148 /* infer new local matrix type from received local matrices type */ 4149 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4150 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4151 if (n_recvs) { 4152 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4153 ptr_idxs = recv_buffer_idxs; 4154 for (i=0;i<n_recvs;i++) { 4155 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4156 new_local_type_private = MATAIJ_PRIVATE; 4157 break; 4158 } 4159 ptr_idxs += olengths_idxs[i]; 4160 } 4161 switch (new_local_type_private) { 4162 case MATDENSE_PRIVATE: 4163 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4164 new_local_type = MATSEQAIJ; 4165 bs = 1; 4166 } else { /* if I receive only 1 dense matrix */ 4167 new_local_type = MATSEQDENSE; 4168 bs = 1; 4169 } 4170 break; 4171 case MATAIJ_PRIVATE: 4172 new_local_type = MATSEQAIJ; 4173 bs = 1; 4174 break; 4175 case MATBAIJ_PRIVATE: 4176 new_local_type = MATSEQBAIJ; 4177 break; 4178 case MATSBAIJ_PRIVATE: 4179 new_local_type = MATSEQSBAIJ; 4180 break; 4181 default: 4182 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4183 break; 4184 } 4185 } else { /* by default, new_local_type is seqdense */ 4186 new_local_type = MATSEQDENSE; 4187 bs = 1; 4188 } 4189 4190 /* create MATIS object if needed */ 4191 if (reuse == MAT_INITIAL_MATRIX) { 4192 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4193 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4194 } else { 4195 /* it also destroys the local matrices */ 4196 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4197 } 4198 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4199 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4200 4201 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4202 4203 /* Global to local map of received indices */ 4204 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4205 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4206 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4207 4208 /* restore attributes -> type of incoming data and its size */ 4209 buf_size_idxs = 0; 4210 for (i=0;i<n_recvs;i++) { 4211 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4212 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4213 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4214 } 4215 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4216 4217 /* set preallocation */ 4218 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4219 if (!newisdense) { 4220 PetscInt *new_local_nnz=0; 4221 4222 ptr_vals = recv_buffer_vals; 4223 ptr_idxs = recv_buffer_idxs_local; 4224 if (n_recvs) { 4225 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4226 } 4227 for (i=0;i<n_recvs;i++) { 4228 PetscInt j; 4229 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4230 for (j=0;j<*(ptr_idxs+1);j++) { 4231 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4232 } 4233 } else { 4234 /* TODO */ 4235 } 4236 ptr_idxs += olengths_idxs[i]; 4237 } 4238 if (new_local_nnz) { 4239 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4240 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4241 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4242 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4243 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4244 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4245 } else { 4246 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4247 } 4248 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4249 } else { 4250 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4251 } 4252 4253 /* set values */ 4254 ptr_vals = recv_buffer_vals; 4255 ptr_idxs = recv_buffer_idxs_local; 4256 for (i=0;i<n_recvs;i++) { 4257 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4258 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4259 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4260 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4261 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4262 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4263 } else { 4264 /* TODO */ 4265 } 4266 ptr_idxs += olengths_idxs[i]; 4267 ptr_vals += olengths_vals[i]; 4268 } 4269 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4270 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4271 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4272 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4273 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4274 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4275 4276 #if 0 4277 if (!restrict_comm) { /* check */ 4278 Vec lvec,rvec; 4279 PetscReal infty_error; 4280 4281 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4282 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4283 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4284 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4285 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4286 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4287 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4288 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4289 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4290 } 4291 #endif 4292 4293 /* assemble new additional is (if any) */ 4294 if (nis) { 4295 PetscInt **temp_idxs,*count_is,j,psum; 4296 4297 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4298 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4299 ptr_idxs = recv_buffer_idxs_is; 4300 psum = 0; 4301 for (i=0;i<n_recvs;i++) { 4302 for (j=0;j<nis;j++) { 4303 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4304 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4305 psum += plen; 4306 ptr_idxs += plen+1; /* shift pointer to received data */ 4307 } 4308 } 4309 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4310 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4311 for (i=1;i<nis;i++) { 4312 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4313 } 4314 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4315 ptr_idxs = recv_buffer_idxs_is; 4316 for (i=0;i<n_recvs;i++) { 4317 for (j=0;j<nis;j++) { 4318 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4319 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4320 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4321 ptr_idxs += plen+1; /* shift pointer to received data */ 4322 } 4323 } 4324 for (i=0;i<nis;i++) { 4325 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4326 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4327 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4328 } 4329 ierr = PetscFree(count_is);CHKERRQ(ierr); 4330 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4331 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4332 } 4333 /* free workspace */ 4334 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4335 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4336 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4337 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4338 if (isdense) { 4339 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4340 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4341 } else { 4342 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4343 } 4344 if (nis) { 4345 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4346 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4347 } 4348 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4349 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4350 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4351 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4352 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4353 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4354 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4355 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4356 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4357 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4358 ierr = PetscFree(onodes);CHKERRQ(ierr); 4359 if (nis) { 4360 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4361 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4362 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4363 } 4364 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4365 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4366 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4367 for (i=0;i<nis;i++) { 4368 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4369 } 4370 *mat_n = NULL; 4371 } 4372 PetscFunctionReturn(0); 4373 } 4374 4375 /* temporary hack into ksp private data structure */ 4376 #include <petsc/private/kspimpl.h> 4377 4378 #undef __FUNCT__ 4379 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4380 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4381 { 4382 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4383 PC_IS *pcis = (PC_IS*)pc->data; 4384 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4385 MatNullSpace CoarseNullSpace=NULL; 4386 ISLocalToGlobalMapping coarse_islg; 4387 IS coarse_is,*isarray; 4388 PetscInt i,im_active=-1,active_procs=-1; 4389 PetscInt nis,nisdofs,nisneu; 4390 PC pc_temp; 4391 PCType coarse_pc_type; 4392 KSPType coarse_ksp_type; 4393 PetscBool multilevel_requested,multilevel_allowed; 4394 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4395 Mat t_coarse_mat_is; 4396 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4397 PetscMPIInt all_procs; 4398 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4399 PetscBool compute_vecs = PETSC_FALSE; 4400 PetscScalar *array; 4401 PetscErrorCode ierr; 4402 4403 PetscFunctionBegin; 4404 /* Assign global numbering to coarse dofs */ 4405 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4406 PetscInt ocoarse_size; 4407 compute_vecs = PETSC_TRUE; 4408 ocoarse_size = pcbddc->coarse_size; 4409 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4410 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4411 /* see if we can avoid some work */ 4412 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4413 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4414 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4415 PC pc; 4416 PetscBool isbddc; 4417 4418 /* temporary workaround since PCBDDC does not have a reset method so far */ 4419 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4420 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4421 if (isbddc) { 4422 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4423 } 4424 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4425 coarse_reuse = PETSC_FALSE; 4426 } else { /* we can safely reuse already computed coarse matrix */ 4427 coarse_reuse = PETSC_TRUE; 4428 } 4429 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4430 coarse_reuse = PETSC_FALSE; 4431 } 4432 /* reset any subassembling information */ 4433 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4434 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4435 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4436 coarse_reuse = PETSC_TRUE; 4437 } 4438 4439 /* count "active" (i.e. with positive local size) and "void" processes */ 4440 im_active = !!(pcis->n); 4441 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4442 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4443 void_procs = all_procs-active_procs; 4444 csin_type_simple = PETSC_TRUE; 4445 redist = PETSC_FALSE; 4446 if (pcbddc->current_level && void_procs) { 4447 csin_ml = PETSC_TRUE; 4448 ncoarse_ml = void_procs; 4449 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4450 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4451 csin_ds = PETSC_TRUE; 4452 ncoarse_ds = pcbddc->redistribute_coarse; 4453 redist = PETSC_TRUE; 4454 } else { 4455 csin_ds = PETSC_TRUE; 4456 ncoarse_ds = active_procs; 4457 redist = PETSC_TRUE; 4458 } 4459 } else { 4460 csin_ml = PETSC_FALSE; 4461 ncoarse_ml = all_procs; 4462 if (void_procs) { 4463 csin_ds = PETSC_TRUE; 4464 ncoarse_ds = void_procs; 4465 csin_type_simple = PETSC_FALSE; 4466 } else { 4467 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4468 csin_ds = PETSC_TRUE; 4469 ncoarse_ds = pcbddc->redistribute_coarse; 4470 redist = PETSC_TRUE; 4471 } else { 4472 csin_ds = PETSC_FALSE; 4473 ncoarse_ds = all_procs; 4474 } 4475 } 4476 } 4477 4478 /* 4479 test if we can go multilevel: three conditions must be satisfied: 4480 - we have not exceeded the number of levels requested 4481 - we can actually subassemble the active processes 4482 - we can find a suitable number of MPI processes where we can place the subassembled problem 4483 */ 4484 multilevel_allowed = PETSC_FALSE; 4485 multilevel_requested = PETSC_FALSE; 4486 if (pcbddc->current_level < pcbddc->max_levels) { 4487 multilevel_requested = PETSC_TRUE; 4488 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4489 multilevel_allowed = PETSC_FALSE; 4490 } else { 4491 multilevel_allowed = PETSC_TRUE; 4492 } 4493 } 4494 /* determine number of process partecipating to coarse solver */ 4495 if (multilevel_allowed) { 4496 ncoarse = ncoarse_ml; 4497 csin = csin_ml; 4498 redist = PETSC_FALSE; 4499 } else { 4500 ncoarse = ncoarse_ds; 4501 csin = csin_ds; 4502 } 4503 4504 /* creates temporary l2gmap and IS for coarse indexes */ 4505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4506 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4507 4508 /* creates temporary MATIS object for coarse matrix */ 4509 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4510 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4511 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4512 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4513 #if 0 4514 { 4515 PetscViewer viewer; 4516 char filename[256]; 4517 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4518 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4519 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4520 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4521 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4522 } 4523 #endif 4524 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4525 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4526 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4527 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4528 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4529 4530 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4531 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4532 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4533 const PetscInt *idxs; 4534 ISLocalToGlobalMapping tmap; 4535 4536 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4537 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4538 /* allocate space for temporary storage */ 4539 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4540 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4541 /* allocate for IS array */ 4542 nisdofs = pcbddc->n_ISForDofsLocal; 4543 nisneu = !!pcbddc->NeumannBoundariesLocal; 4544 nis = nisdofs + nisneu; 4545 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4546 /* dofs splitting */ 4547 for (i=0;i<nisdofs;i++) { 4548 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4549 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4550 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4551 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4552 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4553 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4554 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4555 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4556 } 4557 /* neumann boundaries */ 4558 if (pcbddc->NeumannBoundariesLocal) { 4559 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4560 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4561 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4562 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4563 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4564 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4565 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4566 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4567 } 4568 /* free memory */ 4569 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4570 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4571 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4572 } else { 4573 nis = 0; 4574 nisdofs = 0; 4575 nisneu = 0; 4576 isarray = NULL; 4577 } 4578 /* destroy no longer needed map */ 4579 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4580 4581 /* restrict on coarse candidates (if needed) */ 4582 coarse_mat_is = NULL; 4583 if (csin) { 4584 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4585 if (redist) { 4586 PetscMPIInt rank; 4587 PetscInt spc,n_spc_p1,dest[1],destsize; 4588 4589 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4590 spc = active_procs/ncoarse; 4591 n_spc_p1 = active_procs%ncoarse; 4592 if (im_active) { 4593 destsize = 1; 4594 if (rank > n_spc_p1*(spc+1)-1) { 4595 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4596 } else { 4597 dest[0] = rank/(spc+1); 4598 } 4599 } else { 4600 destsize = 0; 4601 } 4602 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4603 } else if (csin_type_simple) { 4604 PetscMPIInt rank; 4605 PetscInt issize,isidx; 4606 4607 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4608 if (im_active) { 4609 issize = 1; 4610 isidx = (PetscInt)rank; 4611 } else { 4612 issize = 0; 4613 isidx = -1; 4614 } 4615 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4616 } else { /* get a suitable subassembling pattern from MATIS code */ 4617 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4618 } 4619 4620 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4621 if (!redist || ncoarse <= void_procs) { 4622 PetscInt ncoarse_cand,tissize,*nisindices; 4623 PetscInt *coarse_candidates; 4624 const PetscInt* tisindices; 4625 4626 /* get coarse candidates' ranks in pc communicator */ 4627 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4628 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4629 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4630 if (!coarse_candidates[i]) { 4631 coarse_candidates[ncoarse_cand++]=i; 4632 } 4633 } 4634 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4635 4636 4637 if (pcbddc->dbg_flag) { 4638 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4639 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4640 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4641 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4642 for (i=0;i<ncoarse_cand;i++) { 4643 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4644 } 4645 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4646 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4647 } 4648 /* shift the pattern on coarse candidates */ 4649 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4650 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4651 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4652 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4653 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4654 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4655 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4656 } 4657 if (pcbddc->dbg_flag) { 4658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4659 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4660 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4661 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4662 } 4663 } 4664 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4665 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4666 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4667 } else { /* this is the last level, so use just receiving processes in subcomm */ 4668 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4669 } 4670 } else { 4671 if (pcbddc->dbg_flag) { 4672 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4673 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4674 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4675 } 4676 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4677 coarse_mat_is = t_coarse_mat_is; 4678 } 4679 4680 /* create local to global scatters for coarse problem */ 4681 if (compute_vecs) { 4682 PetscInt lrows; 4683 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4684 if (coarse_mat_is) { 4685 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4686 } else { 4687 lrows = 0; 4688 } 4689 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4690 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4691 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4692 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4693 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4694 } 4695 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4696 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4697 4698 /* set defaults for coarse KSP and PC */ 4699 if (multilevel_allowed) { 4700 coarse_ksp_type = KSPRICHARDSON; 4701 coarse_pc_type = PCBDDC; 4702 } else { 4703 coarse_ksp_type = KSPPREONLY; 4704 coarse_pc_type = PCREDUNDANT; 4705 } 4706 4707 /* print some info if requested */ 4708 if (pcbddc->dbg_flag) { 4709 if (!multilevel_allowed) { 4710 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4711 if (multilevel_requested) { 4712 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4713 } else if (pcbddc->max_levels) { 4714 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4715 } 4716 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4717 } 4718 } 4719 4720 /* create the coarse KSP object only once with defaults */ 4721 if (coarse_mat_is) { 4722 MatReuse coarse_mat_reuse; 4723 PetscViewer dbg_viewer = NULL; 4724 if (pcbddc->dbg_flag) { 4725 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 4726 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4727 } 4728 if (!pcbddc->coarse_ksp) { 4729 char prefix[256],str_level[16]; 4730 size_t len; 4731 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 4732 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4733 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 4734 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 4735 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 4736 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 4737 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 4738 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4739 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4740 /* prefix */ 4741 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 4742 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4743 if (!pcbddc->current_level) { 4744 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4745 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 4746 } else { 4747 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4748 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4749 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4750 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4751 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4752 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 4753 } 4754 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 4755 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4756 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 4757 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4758 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 4759 /* allow user customization */ 4760 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 4761 } 4762 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 4763 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 4764 if (nisdofs) { 4765 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 4766 for (i=0;i<nisdofs;i++) { 4767 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4768 } 4769 } 4770 if (nisneu) { 4771 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 4772 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 4773 } 4774 4775 /* get some info after set from options */ 4776 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 4777 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 4778 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 4779 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 4780 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 4781 isbddc = PETSC_FALSE; 4782 } 4783 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4784 if (isredundant) { 4785 KSP inner_ksp; 4786 PC inner_pc; 4787 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 4788 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 4789 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 4790 } 4791 4792 /* assemble coarse matrix */ 4793 if (coarse_reuse) { 4794 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 4795 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 4796 coarse_mat_reuse = MAT_REUSE_MATRIX; 4797 } else { 4798 coarse_mat_reuse = MAT_INITIAL_MATRIX; 4799 } 4800 if (isbddc || isnn) { 4801 if (pcbddc->coarsening_ratio > 1) { 4802 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 4803 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4804 if (pcbddc->dbg_flag) { 4805 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4806 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 4807 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 4808 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4809 } 4810 } 4811 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 4812 } else { 4813 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 4814 coarse_mat = coarse_mat_is; 4815 } 4816 } else { 4817 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 4818 } 4819 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 4820 4821 /* propagate symmetry info of coarse matrix */ 4822 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 4823 if (pc->pmat->symmetric_set) { 4824 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 4825 } 4826 if (pc->pmat->hermitian_set) { 4827 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 4828 } 4829 if (pc->pmat->spd_set) { 4830 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 4831 } 4832 /* set operators */ 4833 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4834 if (pcbddc->dbg_flag) { 4835 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 4836 } 4837 } else { /* processes non partecipating to coarse solver (if any) */ 4838 coarse_mat = 0; 4839 } 4840 ierr = PetscFree(isarray);CHKERRQ(ierr); 4841 #if 0 4842 { 4843 PetscViewer viewer; 4844 char filename[256]; 4845 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 4846 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 4847 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4848 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 4849 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4850 } 4851 #endif 4852 4853 /* Compute coarse null space (special handling by BDDC only) */ 4854 #if 0 4855 if (pcbddc->NullSpace) { 4856 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 4857 } 4858 #endif 4859 4860 if (pcbddc->coarse_ksp) { 4861 Vec crhs,csol; 4862 PetscBool ispreonly; 4863 4864 if (CoarseNullSpace) { 4865 if (isbddc) { 4866 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 4867 } else { 4868 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 4869 } 4870 } 4871 /* setup coarse ksp */ 4872 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 4873 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 4874 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 4875 /* hack */ 4876 if (!csol) { 4877 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 4878 } 4879 if (!crhs) { 4880 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 4881 } 4882 /* Check coarse problem if in debug mode or if solving with an iterative method */ 4883 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 4884 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 4885 KSP check_ksp; 4886 KSPType check_ksp_type; 4887 PC check_pc; 4888 Vec check_vec,coarse_vec; 4889 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 4890 PetscInt its; 4891 PetscBool compute_eigs; 4892 PetscReal *eigs_r,*eigs_c; 4893 PetscInt neigs; 4894 const char *prefix; 4895 4896 /* Create ksp object suitable for estimation of extreme eigenvalues */ 4897 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 4898 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 4899 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 4900 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 4901 if (ispreonly) { 4902 check_ksp_type = KSPPREONLY; 4903 compute_eigs = PETSC_FALSE; 4904 } else { 4905 check_ksp_type = KSPGMRES; 4906 compute_eigs = PETSC_TRUE; 4907 } 4908 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 4909 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 4910 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 4911 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 4912 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 4913 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 4914 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 4915 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 4916 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 4917 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 4918 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 4919 /* create random vec */ 4920 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 4921 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 4922 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 4923 if (CoarseNullSpace) { 4924 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 4925 } 4926 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4927 /* solve coarse problem */ 4928 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 4929 if (CoarseNullSpace) { 4930 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 4931 } 4932 /* set eigenvalue estimation if preonly has not been requested */ 4933 if (compute_eigs) { 4934 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 4935 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 4936 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 4937 lambda_max = eigs_r[neigs-1]; 4938 lambda_min = eigs_r[0]; 4939 if (pcbddc->use_coarse_estimates) { 4940 if (lambda_max>lambda_min) { 4941 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 4942 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 4943 } 4944 } 4945 } 4946 4947 /* check coarse problem residual error */ 4948 if (pcbddc->dbg_flag) { 4949 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 4950 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4951 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 4952 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4953 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 4954 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 4955 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 4956 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 4957 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 4958 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 4959 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 4960 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 4961 if (compute_eigs) { 4962 PetscReal lambda_max_s,lambda_min_s; 4963 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 4964 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 4965 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 4966 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 4967 for (i=0;i<neigs;i++) { 4968 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 4969 } 4970 } 4971 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 4972 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 4973 } 4974 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 4975 if (compute_eigs) { 4976 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 4977 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 4978 } 4979 } 4980 } 4981 /* print additional info */ 4982 if (pcbddc->dbg_flag) { 4983 /* waits until all processes reaches this point */ 4984 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 4985 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 4986 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4987 } 4988 4989 /* free memory */ 4990 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 4991 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 4992 PetscFunctionReturn(0); 4993 } 4994 4995 #undef __FUNCT__ 4996 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 4997 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 4998 { 4999 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5000 PC_IS* pcis = (PC_IS*)pc->data; 5001 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5002 IS subset,subset_mult,subset_n; 5003 PetscInt local_size,coarse_size=0; 5004 PetscInt *local_primal_indices=NULL; 5005 const PetscInt *t_local_primal_indices; 5006 PetscErrorCode ierr; 5007 5008 PetscFunctionBegin; 5009 /* Compute global number of coarse dofs */ 5010 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5011 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5012 } 5013 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5014 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5015 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5016 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5017 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5018 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5019 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5020 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5021 if (local_size != pcbddc->local_primal_size) { 5022 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5023 } 5024 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5025 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5026 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5027 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5028 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5029 5030 /* check numbering */ 5031 if (pcbddc->dbg_flag) { 5032 PetscScalar coarsesum,*array,*array2; 5033 PetscInt i; 5034 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5035 5036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5037 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5038 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5039 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5040 /* counter */ 5041 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5042 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5043 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5044 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5045 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5046 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5047 5048 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5049 for (i=0;i<pcbddc->local_primal_size;i++) { 5050 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5051 } 5052 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5053 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5054 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5055 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5056 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5057 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5058 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5059 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5060 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5061 for (i=0;i<pcis->n;i++) { 5062 if (array[i] != 0.0 && array[i] != array2[i]) { 5063 PetscInt owned = (PetscInt)(array[i]); 5064 PetscInt neigh = (PetscInt)(array2[i]); 5065 set_error = PETSC_TRUE; 5066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5067 } 5068 } 5069 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5070 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5071 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5072 for (i=0;i<pcis->n;i++) { 5073 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5074 } 5075 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5076 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5077 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5078 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5079 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5080 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5081 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5082 PetscInt *gidxs; 5083 5084 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5085 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5087 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5088 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5089 for (i=0;i<pcbddc->local_primal_size;i++) { 5090 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5091 } 5092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5093 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5094 } 5095 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5096 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5097 } 5098 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5099 /* get back data */ 5100 *coarse_size_n = coarse_size; 5101 *local_primal_indices_n = local_primal_indices; 5102 PetscFunctionReturn(0); 5103 } 5104 5105 #undef __FUNCT__ 5106 #define __FUNCT__ "PCBDDCGlobalToLocal" 5107 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5108 { 5109 IS localis_t; 5110 PetscInt i,lsize,*idxs,n; 5111 PetscScalar *vals; 5112 PetscErrorCode ierr; 5113 5114 PetscFunctionBegin; 5115 /* get indices in local ordering exploiting local to global map */ 5116 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5117 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5118 for (i=0;i<lsize;i++) vals[i] = 1.0; 5119 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5120 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5121 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5122 if (idxs) { /* multilevel guard */ 5123 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5124 } 5125 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5126 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5127 ierr = PetscFree(vals);CHKERRQ(ierr); 5128 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5129 /* now compute set in local ordering */ 5130 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5131 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5132 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5133 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5134 for (i=0,lsize=0;i<n;i++) { 5135 if (PetscRealPart(vals[i]) > 0.5) { 5136 lsize++; 5137 } 5138 } 5139 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5140 for (i=0,lsize=0;i<n;i++) { 5141 if (PetscRealPart(vals[i]) > 0.5) { 5142 idxs[lsize++] = i; 5143 } 5144 } 5145 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5146 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5147 *localis = localis_t; 5148 PetscFunctionReturn(0); 5149 } 5150 5151 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5152 #undef __FUNCT__ 5153 #define __FUNCT__ "PCBDDCMatMult_Private" 5154 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5155 { 5156 PCBDDCChange_ctx change_ctx; 5157 PetscErrorCode ierr; 5158 5159 PetscFunctionBegin; 5160 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5161 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5162 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5163 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5164 PetscFunctionReturn(0); 5165 } 5166 5167 #undef __FUNCT__ 5168 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5169 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5170 { 5171 PCBDDCChange_ctx change_ctx; 5172 PetscErrorCode ierr; 5173 5174 PetscFunctionBegin; 5175 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5176 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5177 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5178 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5179 PetscFunctionReturn(0); 5180 } 5181 5182 #undef __FUNCT__ 5183 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5184 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5185 { 5186 PC_IS *pcis=(PC_IS*)pc->data; 5187 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5188 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5189 Mat S_j; 5190 PetscInt *used_xadj,*used_adjncy; 5191 PetscBool free_used_adj; 5192 PetscErrorCode ierr; 5193 5194 PetscFunctionBegin; 5195 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5196 free_used_adj = PETSC_FALSE; 5197 if (pcbddc->sub_schurs_layers == -1) { 5198 used_xadj = NULL; 5199 used_adjncy = NULL; 5200 } else { 5201 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5202 used_xadj = pcbddc->mat_graph->xadj; 5203 used_adjncy = pcbddc->mat_graph->adjncy; 5204 } else if (pcbddc->computed_rowadj) { 5205 used_xadj = pcbddc->mat_graph->xadj; 5206 used_adjncy = pcbddc->mat_graph->adjncy; 5207 } else { 5208 PetscBool flg_row=PETSC_FALSE; 5209 const PetscInt *xadj,*adjncy; 5210 PetscInt nvtxs; 5211 5212 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5213 if (flg_row) { 5214 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5215 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5216 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5217 free_used_adj = PETSC_TRUE; 5218 } else { 5219 pcbddc->sub_schurs_layers = -1; 5220 used_xadj = NULL; 5221 used_adjncy = NULL; 5222 } 5223 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5224 } 5225 } 5226 5227 /* setup sub_schurs data */ 5228 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5229 if (!sub_schurs->use_mumps) { 5230 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5231 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5232 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5233 } else { 5234 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5235 PetscBool isseqaij; 5236 if (!pcbddc->use_vertices && reuse_solvers) { 5237 PetscInt n_vertices; 5238 5239 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5240 reuse_solvers = (PetscBool)!n_vertices; 5241 } 5242 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5243 if (!isseqaij) { 5244 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5245 if (matis->A == pcbddc->local_mat) { 5246 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5247 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5248 } else { 5249 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5250 } 5251 } 5252 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5253 } 5254 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5255 5256 /* free adjacency */ 5257 if (free_used_adj) { 5258 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5259 } 5260 PetscFunctionReturn(0); 5261 } 5262 5263 #undef __FUNCT__ 5264 #define __FUNCT__ "PCBDDCInitSubSchurs" 5265 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5266 { 5267 PC_IS *pcis=(PC_IS*)pc->data; 5268 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5269 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5270 PCBDDCGraph graph; 5271 PetscErrorCode ierr; 5272 5273 PetscFunctionBegin; 5274 /* attach interface graph for determining subsets */ 5275 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5276 IS verticesIS,verticescomm; 5277 PetscInt vsize,*idxs; 5278 5279 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5280 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5281 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5282 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5283 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5284 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5285 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5286 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5287 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5288 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5289 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5290 /* 5291 if (pcbddc->dbg_flag) { 5292 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5293 } 5294 */ 5295 } else { 5296 graph = pcbddc->mat_graph; 5297 } 5298 5299 /* sub_schurs init */ 5300 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5301 5302 /* free graph struct */ 5303 if (pcbddc->sub_schurs_rebuild) { 5304 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5305 } 5306 PetscFunctionReturn(0); 5307 } 5308