1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 #include <petscblaslapack.h> 4 5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatDetectDisconnectedComponents" 10 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 11 { 12 Mat B; 13 IS is_dummy,*cc_n; 14 ISLocalToGlobalMapping l2gmap_dummy; 15 PCBDDCGraph graph; 16 PetscInt i,n; 17 PetscInt *xadj,*adjncy; 18 PetscInt *xadj_filtered,*adjncy_filtered; 19 PetscBool flg_row,isseqaij; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 24 if (!isseqaij && filter) { 25 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 26 } else { 27 B = A; 28 } 29 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 30 31 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 32 if (filter) { 33 PetscScalar *data; 34 PetscInt j,cum; 35 36 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 37 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 38 cum = 0; 39 for (i=0;i<n;i++) { 40 PetscInt t; 41 42 for (j=xadj[i];j<xadj[i+1];j++) { 43 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 44 continue; 45 } 46 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 47 } 48 t = xadj_filtered[i]; 49 xadj_filtered[i] = cum; 50 cum += t; 51 } 52 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 53 } else { 54 xadj_filtered = NULL; 55 adjncy_filtered = NULL; 56 } 57 58 /* compute local connected components using PCBDDCGraph */ 59 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 60 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 61 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 62 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 63 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 64 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 65 if (xadj_filtered) { 66 graph->xadj = xadj_filtered; 67 graph->adjncy = adjncy_filtered; 68 } else { 69 graph->xadj = xadj; 70 graph->adjncy = adjncy; 71 } 72 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 73 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 74 75 /* partial clean up */ 76 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 77 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 78 if (!isseqaij && filter) { 79 ierr = MatDestroy(&B);CHKERRQ(ierr); 80 } 81 82 /* get back data */ 83 *ncc = graph->ncc; 84 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 85 for (i=0;i<graph->ncc;i++) { 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 87 } 88 *cc = cc_n; 89 90 /* clean up graph */ 91 graph->xadj = 0; 92 graph->adjncy = 0; 93 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "PCBDDCBenignCheck" 99 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 100 { 101 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 102 PC_IS* pcis = (PC_IS*)(pc->data); 103 IS dirIS = NULL; 104 PetscInt i; 105 PetscErrorCode ierr; 106 107 PetscFunctionBegin; 108 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 109 if (zerodiag) { 110 Mat A; 111 Vec vec3_N; 112 PetscScalar *vals; 113 const PetscInt *idxs; 114 PetscInt nz; 115 116 /* p0 */ 117 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 118 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 119 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 120 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 121 for (i=0;i<nz;i++) vals[i] = 1.; 122 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 123 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 124 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 125 /* v_I */ 126 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 127 for (i=0;i<nz;i++) vals[i] = 0.; 128 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 130 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 131 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 132 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 133 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 134 if (dirIS) { 135 PetscInt n; 136 137 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 138 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 139 for (i=0;i<n;i++) vals[i] = 0.; 140 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 141 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 142 } 143 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 144 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 145 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 146 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 147 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 148 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 149 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 150 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 151 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 152 } 153 ierr = PetscFree(vals);CHKERRQ(ierr); 154 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 155 } 156 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 157 158 /* check PCBDDCBenignGetOrSetP0 */ 159 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 160 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 161 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 162 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 163 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 164 for (i=0;i<pcbddc->benign_n;i++) { 165 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 166 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 167 } 168 } 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 174 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 175 { 176 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 177 IS pressures,zerodiag,*zerodiag_subs; 178 PetscInt nz,n; 179 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 184 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 185 pcbddc->benign_original_mat = pcbddc->local_mat; 186 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 187 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 188 Checks if all the pressure dofs in each subdomain have a zero diagonal 189 If not, a change of basis on pressures is not needed 190 since the local Schur complements are SPD 191 */ 192 has_null_pressures = PETSC_TRUE; 193 have_null = PETSC_TRUE; 194 if (pcbddc->n_ISForDofsLocal) { 195 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 196 197 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 198 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 199 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 200 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 201 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 202 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 203 if (!sorted) { 204 ierr = ISSort(pressures);CHKERRQ(ierr); 205 } 206 } else { 207 pressures = NULL; 208 } 209 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 210 /* TODO: add check for shared dofs and raise error */ 211 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 212 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 213 if (!sorted) { 214 ierr = ISSort(zerodiag);CHKERRQ(ierr); 215 } 216 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 217 if (!nz) { 218 if (n) have_null = PETSC_FALSE; 219 has_null_pressures = PETSC_FALSE; 220 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 221 } 222 223 recompute_zerodiag = PETSC_FALSE; 224 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 225 zerodiag_subs = NULL; 226 pcbddc->benign_n = 0; 227 if (has_null_pressures) { 228 IS *subs; 229 PetscInt nsubs,i; 230 231 subs = pcbddc->local_subs; 232 nsubs = pcbddc->n_local_subs; 233 if (nsubs > 1) { 234 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 235 for (i=0;i<nsubs;i++) { 236 ISLocalToGlobalMapping l2g; 237 IS t_zerodiag_subs; 238 PetscInt nl; 239 240 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 241 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 242 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 243 if (nl) { 244 PetscBool valid = PETSC_TRUE; 245 246 if (pressures) { 247 IS t_pressure_subs; 248 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 249 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 250 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 251 } 252 if (valid) { 253 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 254 pcbddc->benign_n++; 255 } else { 256 recompute_zerodiag = PETSC_TRUE; 257 } 258 } 259 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 260 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 261 } 262 } else { /* there's just one subdomain (or zero if they have not been detected */ 263 PetscBool valid = PETSC_TRUE; 264 if (pressures) { 265 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 266 } 267 if (valid) { 268 pcbddc->benign_n = 1; 269 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 270 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 271 zerodiag_subs[0] = zerodiag; 272 } 273 } 274 } 275 276 if (!pcbddc->benign_n) { 277 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 278 recompute_zerodiag = PETSC_FALSE; 279 has_null_pressures = PETSC_FALSE; 280 have_null = PETSC_FALSE; 281 } 282 283 /* final check for null pressures */ 284 if (zerodiag && pressures) { 285 PetscInt nz,np; 286 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 287 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 288 if (nz != np) have_null = PETSC_FALSE; 289 } 290 291 if (recompute_zerodiag) { 292 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 293 if (pcbddc->benign_n == 1) { 294 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 295 zerodiag = zerodiag_subs[0]; 296 } else { 297 PetscInt i,nzn,*new_idxs; 298 299 nzn = 0; 300 for (i=0;i<pcbddc->benign_n;i++) { 301 PetscInt ns; 302 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 303 nzn += ns; 304 } 305 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 306 nzn = 0; 307 for (i=0;i<pcbddc->benign_n;i++) { 308 PetscInt ns,*idxs; 309 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 310 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 311 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 313 nzn += ns; 314 } 315 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 316 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 317 } 318 have_null = PETSC_FALSE; 319 } 320 321 if (has_null_pressures) { 322 IS zerodiagc; 323 const PetscInt *idxs,*idxsc; 324 PetscInt i,s,*nnz; 325 326 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 327 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 328 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 329 /* local change of basis for pressures */ 330 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 331 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 332 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 333 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 334 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 335 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 336 for (i=0;i<pcbddc->benign_n;i++) { 337 PetscInt nzs,j; 338 339 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 340 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 341 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 342 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 343 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 344 } 345 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 346 ierr = PetscFree(nnz);CHKERRQ(ierr); 347 /* set identity on velocities */ 348 for (i=0;i<n-nz;i++) { 349 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 350 } 351 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 352 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 353 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 354 /* set change on pressures */ 355 for (s=0;s<pcbddc->benign_n;s++) { 356 PetscScalar *array; 357 PetscInt nzs; 358 359 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 360 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 361 for (i=0;i<nzs-1;i++) { 362 PetscScalar vals[2]; 363 PetscInt cols[2]; 364 365 cols[0] = idxs[i]; 366 cols[1] = idxs[nzs-1]; 367 vals[0] = 1.; 368 vals[1] = 1.; 369 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 370 } 371 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 372 for (i=0;i<nzs-1;i++) array[i] = -1.; 373 array[nzs-1] = 1.; 374 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 375 /* store local idxs for p0 */ 376 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 377 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 378 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 379 ierr = PetscFree(array);CHKERRQ(ierr); 380 } 381 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 383 /* TODO: need optimization? */ 384 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 385 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 386 /* store global idxs for p0 */ 387 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 388 } 389 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 390 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 391 392 /* determines if the coarse solver will be singular or not */ 393 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 394 *zerodiaglocal = zerodiag; 395 PetscFunctionReturn(0); 396 } 397 398 #undef __FUNCT__ 399 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 400 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 401 { 402 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 403 PetscErrorCode ierr; 404 405 PetscFunctionBegin; 406 if (!pcbddc->benign_sf) { 407 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 408 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 409 } 410 if (get) { /* use SF to get values */ 411 PetscScalar *array; 412 413 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 414 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 415 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 416 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 417 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 418 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 420 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 421 } 422 PetscFunctionReturn(0); 423 } 424 425 #undef __FUNCT__ 426 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 427 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 428 { 429 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 430 PetscErrorCode ierr; 431 432 PetscFunctionBegin; 433 /* TODO: add error checking 434 - avoid nested pop (or push) calls. 435 - cannot push before pop. 436 - cannot call this if pcbddc->local_mat is NULL 437 */ 438 if (!pcbddc->benign_n) { 439 PetscFunctionReturn(0); 440 } 441 if (pop) { 442 IS is_p0; 443 MatReuse reuse; 444 445 /* extract B_0 */ 446 reuse = MAT_INITIAL_MATRIX; 447 if (pcbddc->benign_B0) { 448 reuse = MAT_REUSE_MATRIX; 449 } 450 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 451 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 452 /* remove rows and cols from local problem */ 453 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 454 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 455 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 456 } else { /* push */ 457 PetscInt i; 458 459 for (i=0;i<pcbddc->benign_n;i++) { 460 PetscScalar *B0_vals; 461 PetscInt *B0_cols,B0_ncol; 462 463 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 464 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 465 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 466 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 467 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 468 } 469 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 470 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 471 } 472 PetscFunctionReturn(0); 473 } 474 475 #undef __FUNCT__ 476 #define __FUNCT__ "PCBDDCAdaptiveSelection" 477 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 478 { 479 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 480 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 481 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 482 PetscBLASInt *B_iwork,*B_ifail; 483 PetscScalar *work,lwork; 484 PetscScalar *St,*S,*eigv; 485 PetscScalar *Sarray,*Starray; 486 PetscReal *eigs,thresh; 487 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 488 PetscBool allocated_S_St; 489 #if defined(PETSC_USE_COMPLEX) 490 PetscReal *rwork; 491 #endif 492 PetscErrorCode ierr; 493 494 PetscFunctionBegin; 495 if (!sub_schurs->use_mumps) { 496 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 497 } 498 499 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 500 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 501 } 502 503 if (pcbddc->dbg_flag) { 504 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 505 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 506 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 507 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 508 } 509 510 if (pcbddc->dbg_flag) { 511 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 512 } 513 514 /* max size of subsets */ 515 mss = 0; 516 for (i=0;i<sub_schurs->n_subs;i++) { 517 PetscInt subset_size; 518 519 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 520 mss = PetscMax(mss,subset_size); 521 } 522 523 /* min/max and threshold */ 524 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 525 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 526 nmax = PetscMax(nmin,nmax); 527 allocated_S_St = PETSC_FALSE; 528 if (nmin) { 529 allocated_S_St = PETSC_TRUE; 530 } 531 532 /* allocate lapack workspace */ 533 cum = cum2 = 0; 534 maxneigs = 0; 535 for (i=0;i<sub_schurs->n_subs;i++) { 536 PetscInt n,subset_size; 537 538 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 539 n = PetscMin(subset_size,nmax); 540 cum += subset_size; 541 cum2 += subset_size*n; 542 maxneigs = PetscMax(maxneigs,n); 543 } 544 if (mss) { 545 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 546 PetscBLASInt B_itype = 1; 547 PetscBLASInt B_N = mss; 548 PetscReal zero = 0.0; 549 PetscReal eps = 0.0; /* dlamch? */ 550 551 B_lwork = -1; 552 S = NULL; 553 St = NULL; 554 eigs = NULL; 555 eigv = NULL; 556 B_iwork = NULL; 557 B_ifail = NULL; 558 #if defined(PETSC_USE_COMPLEX) 559 rwork = NULL; 560 #endif 561 thresh = 1.0; 562 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 563 #if defined(PETSC_USE_COMPLEX) 564 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 565 #else 566 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 567 #endif 568 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 569 ierr = PetscFPTrapPop();CHKERRQ(ierr); 570 } else { 571 /* TODO */ 572 } 573 } else { 574 lwork = 0; 575 } 576 577 nv = 0; 578 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 579 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 580 } 581 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 582 if (allocated_S_St) { 583 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 584 } 585 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 586 #if defined(PETSC_USE_COMPLEX) 587 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 588 #endif 589 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 590 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 591 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 592 nv+cum,&pcbddc->adaptive_constraints_idxs, 593 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 594 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 595 596 maxneigs = 0; 597 cum = cum2 = cumarray = 0; 598 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 599 pcbddc->adaptive_constraints_data_ptr[0] = 0; 600 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 601 const PetscInt *idxs; 602 603 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 604 for (cum=0;cum<nv;cum++) { 605 pcbddc->adaptive_constraints_n[cum] = 1; 606 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 607 pcbddc->adaptive_constraints_data[cum] = 1.0; 608 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 609 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 610 } 611 cum2 = cum; 612 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 613 } 614 615 if (mss) { /* multilevel */ 616 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 617 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 618 } 619 620 for (i=0;i<sub_schurs->n_subs;i++) { 621 622 const PetscInt *idxs; 623 PetscReal infty = PETSC_MAX_REAL; 624 PetscInt j,subset_size,eigs_start = 0; 625 PetscBLASInt B_N; 626 PetscBool same_data = PETSC_FALSE; 627 628 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 629 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 630 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 631 if (sub_schurs->is_hermitian) { 632 PetscInt j,k; 633 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 634 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 635 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 636 } 637 for (j=0;j<subset_size;j++) { 638 for (k=j;k<subset_size;k++) { 639 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 640 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 641 } 642 } 643 } else { 644 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 645 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 646 } 647 } else { 648 S = Sarray + cumarray; 649 St = Starray + cumarray; 650 } 651 652 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 653 /* see if we can save some work */ 654 if (sub_schurs->n_subs == 1) { 655 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 656 } 657 658 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 659 B_neigs = 0; 660 } else { 661 /* Threshold: this is an heuristic for edges */ 662 thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold; 663 664 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 665 PetscBLASInt B_itype = 1; 666 PetscBLASInt B_IL, B_IU; 667 PetscReal eps = -1.0; /* dlamch? */ 668 PetscInt nmin_s; 669 670 if (pcbddc->dbg_flag) { 671 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 672 } 673 674 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 675 if (thresh > 1.+PETSC_SMALL) { 676 677 /* ask for eigenvalues larger than thresh */ 678 #if defined(PETSC_USE_COMPLEX) 679 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 680 #else 681 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 682 #endif 683 } else { 684 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 685 B_IL = 1; 686 #if defined(PETSC_USE_COMPLEX) 687 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 688 #else 689 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 690 #endif 691 } 692 ierr = PetscFPTrapPop();CHKERRQ(ierr); 693 if (B_ierr) { 694 if (B_ierr < 0 ) { 695 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 696 } else if (B_ierr <= B_N) { 697 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 698 } else { 699 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 700 } 701 } 702 703 if (B_neigs > nmax) { 704 if (pcbddc->dbg_flag) { 705 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 706 } 707 eigs_start = B_neigs -nmax; 708 B_neigs = nmax; 709 } 710 711 nmin_s = PetscMin(nmin,B_N); 712 if (B_neigs < nmin_s) { 713 PetscBLASInt B_neigs2; 714 715 B_IU = B_N - B_neigs; 716 B_IL = B_N - nmin_s + 1; 717 if (pcbddc->dbg_flag) { 718 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 719 } 720 if (sub_schurs->is_hermitian) { 721 PetscInt j; 722 for (j=0;j<subset_size;j++) { 723 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 724 } 725 for (j=0;j<subset_size;j++) { 726 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 727 } 728 } else { 729 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 730 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 731 } 732 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 733 #if defined(PETSC_USE_COMPLEX) 734 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 735 #else 736 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 737 #endif 738 ierr = PetscFPTrapPop();CHKERRQ(ierr); 739 B_neigs += B_neigs2; 740 } 741 if (B_ierr) { 742 if (B_ierr < 0 ) { 743 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 744 } else if (B_ierr <= B_N) { 745 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 746 } else { 747 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 748 } 749 } 750 if (pcbddc->dbg_flag) { 751 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 752 for (j=0;j<B_neigs;j++) { 753 if (eigs[j] == 0.0) { 754 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 755 } else { 756 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 757 } 758 } 759 } 760 } else { 761 /* TODO */ 762 } 763 } 764 maxneigs = PetscMax(B_neigs,maxneigs); 765 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 766 if (B_neigs) { 767 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 768 769 if (pcbddc->dbg_flag > 1) { 770 PetscInt ii; 771 for (ii=0;ii<B_neigs;ii++) { 772 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 773 for (j=0;j<B_N;j++) { 774 #if defined(PETSC_USE_COMPLEX) 775 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 776 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 777 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 778 #else 779 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 780 #endif 781 } 782 } 783 } 784 #if 0 785 for (j=0;j<B_neigs;j++) { 786 PetscBLASInt Blas_N,Blas_one = 1.0; 787 PetscScalar norm; 788 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 789 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 790 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 791 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 792 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 793 } else { 794 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 795 } 796 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 797 } 798 #endif 799 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 800 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 801 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 802 cum++; 803 } 804 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 805 /* shift for next computation */ 806 cumarray += subset_size*subset_size; 807 } 808 if (pcbddc->dbg_flag) { 809 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 810 } 811 812 if (mss) { 813 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 814 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 815 /* destroy matrices (junk) */ 816 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 817 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 818 } 819 if (allocated_S_St) { 820 ierr = PetscFree2(S,St);CHKERRQ(ierr); 821 } 822 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 823 #if defined(PETSC_USE_COMPLEX) 824 ierr = PetscFree(rwork);CHKERRQ(ierr); 825 #endif 826 if (pcbddc->dbg_flag) { 827 PetscInt maxneigs_r; 828 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 829 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 830 } 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNCT__ 835 #define __FUNCT__ "PCBDDCSetUpSolvers" 836 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 837 { 838 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 839 PetscScalar *coarse_submat_vals; 840 PetscErrorCode ierr; 841 842 PetscFunctionBegin; 843 /* Setup local scatters R_to_B and (optionally) R_to_D */ 844 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 845 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 846 847 /* Setup local neumann solver ksp_R */ 848 /* PCBDDCSetUpLocalScatters should be called first! */ 849 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 850 851 /* Change global null space passed in by the user if change of basis has been requested */ 852 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 853 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 854 } 855 856 /* 857 Setup local correction and local part of coarse basis. 858 Gives back the dense local part of the coarse matrix in column major ordering 859 */ 860 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 861 862 /* Compute total number of coarse nodes and setup coarse solver */ 863 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 864 865 /* free */ 866 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "PCBDDCResetCustomization" 872 PetscErrorCode PCBDDCResetCustomization(PC pc) 873 { 874 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 875 PetscErrorCode ierr; 876 877 PetscFunctionBegin; 878 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 879 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 880 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 881 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 882 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 883 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 884 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 885 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 886 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 887 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 888 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "PCBDDCResetTopography" 894 PetscErrorCode PCBDDCResetTopography(PC pc) 895 { 896 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 897 PetscInt i; 898 PetscErrorCode ierr; 899 900 PetscFunctionBegin; 901 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 902 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 903 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 904 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 905 for (i=0;i<pcbddc->n_local_subs;i++) { 906 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 907 } 908 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 909 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "PCBDDCResetSolvers" 915 PetscErrorCode PCBDDCResetSolvers(PC pc) 916 { 917 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 918 PetscScalar *array; 919 PetscErrorCode ierr; 920 921 PetscFunctionBegin; 922 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 923 if (pcbddc->coarse_phi_B) { 924 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 925 ierr = PetscFree(array);CHKERRQ(ierr); 926 } 927 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 928 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 929 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 930 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 931 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 932 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 933 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 934 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 935 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 936 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 937 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 938 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 939 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 940 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 941 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 942 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 943 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 944 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 945 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 946 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 947 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 948 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 949 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 950 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 951 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 952 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 953 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 954 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 955 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 956 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 962 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 963 { 964 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 965 PC_IS *pcis = (PC_IS*)pc->data; 966 VecType impVecType; 967 PetscInt n_constraints,n_R,old_size; 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 if (!pcbddc->ConstraintMatrix) { 972 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 973 } 974 /* get sizes */ 975 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 976 n_R = pcis->n - pcbddc->n_vertices; 977 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 978 /* local work vectors (try to avoid unneeded work)*/ 979 /* R nodes */ 980 old_size = -1; 981 if (pcbddc->vec1_R) { 982 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 983 } 984 if (n_R != old_size) { 985 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 986 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 987 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 988 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 989 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 990 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 991 } 992 /* local primal dofs */ 993 old_size = -1; 994 if (pcbddc->vec1_P) { 995 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 996 } 997 if (pcbddc->local_primal_size != old_size) { 998 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 999 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1000 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1001 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1002 } 1003 /* local explicit constraints */ 1004 old_size = -1; 1005 if (pcbddc->vec1_C) { 1006 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1007 } 1008 if (n_constraints && n_constraints != old_size) { 1009 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1010 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1011 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1012 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "PCBDDCSetUpCorrection" 1019 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1020 { 1021 PetscErrorCode ierr; 1022 /* pointers to pcis and pcbddc */ 1023 PC_IS* pcis = (PC_IS*)pc->data; 1024 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1025 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1026 /* submatrices of local problem */ 1027 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1028 /* submatrices of benign trick */ 1029 Mat B0_V = NULL; 1030 /* submatrices of local coarse problem */ 1031 Mat S_VV,S_CV,S_VC,S_CC; 1032 /* working matrices */ 1033 Mat C_CR; 1034 /* additional working stuff */ 1035 PC pc_R; 1036 Mat F; 1037 PetscBool isLU,isCHOL,isILU; 1038 1039 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1040 PetscScalar *work; 1041 PetscInt *idx_V_B; 1042 PetscInt n,n_vertices,n_constraints,*p0_lidx_I; 1043 PetscInt i,n_R,n_D,n_B; 1044 PetscBool unsymmetric_check; 1045 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 1046 MatType impMatType; 1047 /* some shortcuts to scalars */ 1048 PetscScalar one=1.0,m_one=-1.0; 1049 1050 PetscFunctionBegin; 1051 n_vertices = pcbddc->n_vertices; 1052 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1053 /* Set Non-overlapping dimensions */ 1054 n_B = pcis->n_B; 1055 n_D = pcis->n - n_B; 1056 n_R = pcis->n - n_vertices; 1057 1058 /* Set types for local objects needed by BDDC precondtioner */ 1059 impMatType = MATSEQDENSE; 1060 1061 /* vertices in boundary numbering */ 1062 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1063 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1064 if (i != n_vertices) { 1065 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1066 } 1067 1068 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1069 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1070 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1071 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1072 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1073 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1074 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1075 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1076 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1077 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1078 1079 unsymmetric_check = PETSC_FALSE; 1080 /* allocate workspace */ 1081 n = 0; 1082 if (n_constraints) { 1083 n += n_R*n_constraints; 1084 } 1085 if (n_vertices) { 1086 n = PetscMax(2*n_R*n_vertices,n); 1087 n = PetscMax((n_R+n_B)*n_vertices,n); 1088 } 1089 if (!pcbddc->symmetric_primal) { 1090 n = PetscMax(2*n_R*pcbddc->local_primal_size,n); 1091 unsymmetric_check = PETSC_TRUE; 1092 } 1093 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1094 1095 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1096 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1097 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1098 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1099 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1100 if (isLU || isILU || isCHOL) { 1101 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1102 } else if (sub_schurs->reuse_mumps) { 1103 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1104 MatFactorType type; 1105 1106 F = reuse_mumps->F; 1107 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1108 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1109 } else { 1110 F = NULL; 1111 } 1112 1113 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1114 if (n_constraints) { 1115 Mat M1,M2,M3; 1116 Mat auxmat; 1117 IS is_aux; 1118 PetscScalar *array,*array2; 1119 1120 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1121 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1122 1123 /* Extract constraints on R nodes: C_{CR} */ 1124 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1125 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1126 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1127 1128 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1129 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1130 ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1131 for (i=0;i<n_constraints;i++) { 1132 const PetscScalar *row_cmat_values; 1133 const PetscInt *row_cmat_indices; 1134 PetscInt size_of_constraint,j; 1135 1136 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1137 for (j=0;j<size_of_constraint;j++) { 1138 work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j]; 1139 } 1140 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1141 } 1142 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1143 if (F) { 1144 Mat B; 1145 1146 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1147 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1148 ierr = MatDestroy(&B);CHKERRQ(ierr); 1149 } else { 1150 PetscScalar *marr; 1151 1152 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1153 for (i=0;i<n_constraints;i++) { 1154 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1155 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr); 1156 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1157 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1158 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1159 } 1160 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1161 } 1162 if (!pcbddc->switch_static) { 1163 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1164 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1165 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1166 for (i=0;i<n_constraints;i++) { 1167 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr); 1168 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1169 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1170 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1171 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1172 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1173 } 1174 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1175 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1176 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1177 } else { 1178 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1179 pcbddc->local_auxmat2 = local_auxmat2_R; 1180 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1181 } 1182 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1183 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1184 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1185 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1186 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1187 if (isCHOL) { 1188 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1189 } else { 1190 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1191 } 1192 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1193 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1194 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1195 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1196 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1197 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1198 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1199 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1200 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1201 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1202 } 1203 /* Get submatrices from subdomain matrix */ 1204 if (pcbddc->benign_n) { 1205 IS dummy; 1206 Mat B0_R; 1207 PetscReal norm; 1208 1209 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1210 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1211 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1212 if (norm > PETSC_SMALL) { 1213 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1214 } 1215 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1216 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1217 } 1218 1219 if (n_vertices) { 1220 IS is_aux; 1221 1222 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1223 IS tis; 1224 1225 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1226 ierr = ISSort(tis);CHKERRQ(ierr); 1227 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1228 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1229 } else { 1230 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1231 } 1232 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1233 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1234 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1235 if (pcbddc->benign_n) { 1236 IS dummy; 1237 1238 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1239 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1240 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1241 } 1242 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1243 } 1244 1245 /* Matrix of coarse basis functions (local) */ 1246 if (pcbddc->coarse_phi_B) { 1247 PetscInt on_B,on_primal,on_D=n_D; 1248 if (pcbddc->coarse_phi_D) { 1249 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1250 } 1251 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1252 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1253 PetscScalar *marray; 1254 1255 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1256 ierr = PetscFree(marray);CHKERRQ(ierr); 1257 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1258 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1259 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1260 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1261 } 1262 } 1263 1264 if (!pcbddc->coarse_phi_B) { 1265 PetscScalar *marray; 1266 1267 n = n_B*pcbddc->local_primal_size; 1268 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1269 n += n_D*pcbddc->local_primal_size; 1270 } 1271 if (!pcbddc->symmetric_primal) { 1272 n *= 2; 1273 } 1274 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1275 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1276 n = n_B*pcbddc->local_primal_size; 1277 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1278 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1279 n += n_D*pcbddc->local_primal_size; 1280 } 1281 if (!pcbddc->symmetric_primal) { 1282 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1283 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1284 n = n_B*pcbddc->local_primal_size; 1285 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1286 } 1287 } else { 1288 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1289 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1290 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1291 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1292 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1293 } 1294 } 1295 } 1296 1297 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1298 p0_lidx_I = NULL; 1299 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1300 const PetscInt *idxs; 1301 1302 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1303 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1304 for (i=0;i<pcbddc->benign_n;i++) { 1305 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1306 } 1307 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1308 } 1309 1310 /* vertices */ 1311 if (n_vertices) { 1312 1313 ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1314 1315 if (n_R) { 1316 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1317 PetscBLASInt B_N,B_one = 1; 1318 PetscScalar *x,*y; 1319 PetscBool isseqaij; 1320 1321 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1322 ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1323 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1324 if (F) { /* TODO could be optimized for symmetric problems */ 1325 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1326 } else { 1327 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1328 for (i=0;i<n_vertices;i++) { 1329 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr); 1330 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1331 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1332 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1333 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1334 } 1335 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1336 } 1337 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1338 /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */ 1339 if (n_constraints) { 1340 Mat B; 1341 1342 ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1343 for (i=0;i<n_vertices;i++) { 1344 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1345 ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr); 1346 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1347 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1348 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1349 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1350 } 1351 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1352 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1353 ierr = MatDestroy(&B);CHKERRQ(ierr); 1354 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr); 1355 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1356 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1357 ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr); 1358 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one)); 1359 ierr = MatDestroy(&B);CHKERRQ(ierr); 1360 } 1361 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1362 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1363 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1364 } 1365 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1366 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1367 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1368 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1369 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1370 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1371 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1372 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1373 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1374 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1375 } else { 1376 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1377 } 1378 for (i=0;i<pcbddc->benign_n;i++) { 1379 const PetscScalar *vals; 1380 const PetscInt *idxs; 1381 PetscInt n,j,primal_idx; 1382 1383 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1384 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1385 for (j=0;j<n;j++) { 1386 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1387 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1388 } 1389 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1390 } 1391 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1392 1393 /* coarse basis functions */ 1394 for (i=0;i<n_vertices;i++) { 1395 PetscScalar *y; 1396 1397 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1398 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1399 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1400 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1401 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1402 y[n_B*i+idx_V_B[i]] = 1.0; 1403 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1404 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1405 1406 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1407 PetscInt j; 1408 1409 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1410 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1411 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1412 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1413 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1414 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1415 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1416 } 1417 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1418 } 1419 /* if n_R == 0 the object is not destroyed */ 1420 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1421 } 1422 1423 if (n_constraints) { 1424 Mat B; 1425 1426 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 1427 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1428 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1429 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1430 if (n_vertices) { 1431 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1432 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1433 } else { 1434 Mat S_VCt; 1435 1436 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1437 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1438 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1439 } 1440 } 1441 ierr = MatDestroy(&B);CHKERRQ(ierr); 1442 /* coarse basis functions */ 1443 for (i=0;i<n_constraints;i++) { 1444 PetscScalar *y; 1445 1446 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr); 1447 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1448 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1449 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1450 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1451 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1452 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1453 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1454 PetscInt j; 1455 1456 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1457 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1458 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1459 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1460 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1461 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1462 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1463 } 1464 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1465 } 1466 } 1467 if (n_constraints) { 1468 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1469 } 1470 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1471 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1472 1473 /* compute other basis functions for non-symmetric problems */ 1474 if (!pcbddc->symmetric_primal) { 1475 1476 if (n_constraints) { 1477 Mat S_CCT,B_C; 1478 1479 /* this is a lazy thing */ 1480 ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr); 1481 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr); 1482 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1483 ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1484 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1485 if (n_vertices) { 1486 Mat B_V,S_VCT; 1487 1488 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr); 1489 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1490 ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1491 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1492 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1493 } 1494 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1495 } else { /* if there are no constraints, reset work */ 1496 ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr); 1497 } 1498 if (n_vertices && n_R) { 1499 Mat A_VRT; 1500 PetscScalar *marray; 1501 PetscBLASInt B_N,B_one = 1; 1502 1503 ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr); 1504 ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr); 1505 ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr); 1506 ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr); 1507 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one)); 1508 ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr); 1509 ierr = MatDestroy(&A_VRT);CHKERRQ(ierr); 1510 } 1511 1512 if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1513 for (i=0;i<pcbddc->local_primal_size;i++) { 1514 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1515 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1516 ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1517 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1518 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1519 } 1520 } else { 1521 for (i=0;i<pcbddc->local_primal_size;i++) { 1522 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1523 ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr); 1524 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1525 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1526 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1527 } 1528 } 1529 /* coarse basis functions */ 1530 for (i=0;i<pcbddc->local_primal_size;i++) { 1531 PetscScalar *y; 1532 1533 ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr); 1534 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1535 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1536 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 if (i<n_vertices) { 1539 y[n_B*i+idx_V_B[i]] = 1.0; 1540 } 1541 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1542 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1543 1544 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1545 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1546 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1547 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1550 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1551 } 1552 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1553 } 1554 } 1555 /* free memory */ 1556 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1557 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1558 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1559 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1560 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1561 ierr = PetscFree(work);CHKERRQ(ierr); 1562 if (n_vertices) { 1563 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1564 } 1565 if (n_constraints) { 1566 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1567 } 1568 /* Checking coarse_sub_mat and coarse basis functios */ 1569 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1570 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1571 if (pcbddc->dbg_flag) { 1572 Mat coarse_sub_mat; 1573 Mat AUXMAT,TM1,TM2,TM3,TM4; 1574 Mat coarse_phi_D,coarse_phi_B; 1575 Mat coarse_psi_D,coarse_psi_B; 1576 Mat A_II,A_BB,A_IB,A_BI; 1577 Mat C_B,CPHI; 1578 IS is_dummy; 1579 Vec mones; 1580 MatType checkmattype=MATSEQAIJ; 1581 PetscReal real_value; 1582 1583 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1584 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1585 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1586 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1587 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1588 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1589 if (unsymmetric_check) { 1590 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1591 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1592 } 1593 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1594 1595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1596 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1597 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1598 if (unsymmetric_check) { 1599 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1600 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1601 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1602 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1603 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1604 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1605 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1606 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1607 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1608 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1609 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1610 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1611 } else { 1612 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1613 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1614 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1615 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1616 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1617 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1618 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1619 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1620 } 1621 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1622 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1623 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1624 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1625 if (pcbddc->benign_n) { 1626 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1627 PetscScalar *data,*data2; 1628 PetscInt j; 1629 1630 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1631 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1632 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1633 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1634 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1635 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1636 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1637 for (j=0;j<pcbddc->benign_n;j++) { 1638 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1639 for (i=0;i<pcbddc->local_primal_size;i++) { 1640 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1641 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1642 } 1643 } 1644 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1645 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1646 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1647 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1648 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1649 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1650 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1651 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1652 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1653 } 1654 #if 0 1655 { 1656 PetscViewer viewer; 1657 char filename[256]; 1658 sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank); 1659 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1660 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1661 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1662 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1663 } 1664 #endif 1665 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1666 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1667 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1668 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1669 1670 /* check constraints */ 1671 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1672 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1673 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1674 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1675 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1676 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1677 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1678 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1679 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1680 if (unsymmetric_check) { 1681 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1682 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1683 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1684 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1686 } 1687 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1688 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1689 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1690 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1691 } 1692 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1693 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1694 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1695 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1696 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1697 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1698 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1699 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1700 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1701 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1702 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1703 if (unsymmetric_check) { 1704 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1705 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1706 } 1707 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1708 } 1709 /* get back data */ 1710 *coarse_submat_vals_n = coarse_submat_vals; 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1716 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1717 { 1718 Mat *work_mat; 1719 IS isrow_s,iscol_s; 1720 PetscBool rsorted,csorted; 1721 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1726 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1727 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1728 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1729 1730 if (!rsorted) { 1731 const PetscInt *idxs; 1732 PetscInt *idxs_sorted,i; 1733 1734 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1735 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1736 for (i=0;i<rsize;i++) { 1737 idxs_perm_r[i] = i; 1738 } 1739 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1740 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1741 for (i=0;i<rsize;i++) { 1742 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1743 } 1744 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1745 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1746 } else { 1747 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1748 isrow_s = isrow; 1749 } 1750 1751 if (!csorted) { 1752 if (isrow == iscol) { 1753 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1754 iscol_s = isrow_s; 1755 } else { 1756 const PetscInt *idxs; 1757 PetscInt *idxs_sorted,i; 1758 1759 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1760 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1761 for (i=0;i<csize;i++) { 1762 idxs_perm_c[i] = i; 1763 } 1764 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1765 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1766 for (i=0;i<csize;i++) { 1767 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1768 } 1769 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1770 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1771 } 1772 } else { 1773 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1774 iscol_s = iscol; 1775 } 1776 1777 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1778 1779 if (!rsorted || !csorted) { 1780 Mat new_mat; 1781 IS is_perm_r,is_perm_c; 1782 1783 if (!rsorted) { 1784 PetscInt *idxs_r,i; 1785 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1786 for (i=0;i<rsize;i++) { 1787 idxs_r[idxs_perm_r[i]] = i; 1788 } 1789 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1790 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1791 } else { 1792 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1793 } 1794 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1795 1796 if (!csorted) { 1797 if (isrow_s == iscol_s) { 1798 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1799 is_perm_c = is_perm_r; 1800 } else { 1801 PetscInt *idxs_c,i; 1802 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1803 for (i=0;i<csize;i++) { 1804 idxs_c[idxs_perm_c[i]] = i; 1805 } 1806 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1807 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1808 } 1809 } else { 1810 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1811 } 1812 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1813 1814 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1815 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1816 work_mat[0] = new_mat; 1817 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1818 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1819 } 1820 1821 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1822 *B = work_mat[0]; 1823 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1824 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1825 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1831 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1832 { 1833 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1834 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1835 Mat new_mat; 1836 IS is_local,is_global; 1837 PetscInt local_size; 1838 PetscBool isseqaij; 1839 PetscErrorCode ierr; 1840 1841 PetscFunctionBegin; 1842 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1843 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1844 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1845 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1846 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1847 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1848 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1849 1850 /* check */ 1851 if (pcbddc->dbg_flag) { 1852 Vec x,x_change; 1853 PetscReal error; 1854 1855 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1856 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1857 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1858 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1859 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1860 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1861 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1862 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1863 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1864 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1866 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1867 ierr = VecDestroy(&x);CHKERRQ(ierr); 1868 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1869 } 1870 1871 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1872 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1873 if (isseqaij) { 1874 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1875 } else { 1876 Mat work_mat; 1877 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1878 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 1879 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 1880 } 1881 if (matis->A->symmetric_set) { 1882 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 1883 #if !defined(PETSC_USE_COMPLEX) 1884 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 1885 #endif 1886 } 1887 /* 1888 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1889 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 1890 */ 1891 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 1892 PetscFunctionReturn(0); 1893 } 1894 1895 #undef __FUNCT__ 1896 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 1897 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 1898 { 1899 PC_IS* pcis = (PC_IS*)(pc->data); 1900 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1901 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1902 PetscInt *idx_R_local=NULL; 1903 PetscInt n_vertices,i,j,n_R,n_D,n_B; 1904 PetscInt vbs,bs; 1905 PetscBT bitmask=NULL; 1906 PetscErrorCode ierr; 1907 1908 PetscFunctionBegin; 1909 /* 1910 No need to setup local scatters if 1911 - primal space is unchanged 1912 AND 1913 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 1914 AND 1915 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 1916 */ 1917 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 1918 PetscFunctionReturn(0); 1919 } 1920 /* destroy old objects */ 1921 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1922 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1923 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1924 /* Set Non-overlapping dimensions */ 1925 n_B = pcis->n_B; 1926 n_D = pcis->n - n_B; 1927 n_vertices = pcbddc->n_vertices; 1928 1929 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 1930 1931 /* create auxiliary bitmask and allocate workspace */ 1932 if (!sub_schurs->reuse_mumps) { 1933 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 1934 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 1935 for (i=0;i<n_vertices;i++) { 1936 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 1937 } 1938 1939 for (i=0, n_R=0; i<pcis->n; i++) { 1940 if (!PetscBTLookup(bitmask,i)) { 1941 idx_R_local[n_R++] = i; 1942 } 1943 } 1944 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 1945 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1946 1947 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1948 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 1949 } 1950 1951 /* Block code */ 1952 vbs = 1; 1953 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 1954 if (bs>1 && !(n_vertices%bs)) { 1955 PetscBool is_blocked = PETSC_TRUE; 1956 PetscInt *vary; 1957 if (!sub_schurs->reuse_mumps) { 1958 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 1959 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 1960 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 1961 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 1962 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 1963 for (i=0; i<pcis->n/bs; i++) { 1964 if (vary[i]!=0 && vary[i]!=bs) { 1965 is_blocked = PETSC_FALSE; 1966 break; 1967 } 1968 } 1969 ierr = PetscFree(vary);CHKERRQ(ierr); 1970 } else { 1971 /* Verify directly the R set */ 1972 for (i=0; i<n_R/bs; i++) { 1973 PetscInt j,node=idx_R_local[bs*i]; 1974 for (j=1; j<bs; j++) { 1975 if (node != idx_R_local[bs*i+j]-j) { 1976 is_blocked = PETSC_FALSE; 1977 break; 1978 } 1979 } 1980 } 1981 } 1982 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 1983 vbs = bs; 1984 for (i=0;i<n_R/vbs;i++) { 1985 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 1986 } 1987 } 1988 } 1989 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 1990 if (sub_schurs->reuse_mumps) { 1991 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1992 1993 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 1994 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 1995 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 1996 reuse_mumps->is_R = pcbddc->is_R_local; 1997 } else { 1998 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 1999 } 2000 2001 /* print some info if requested */ 2002 if (pcbddc->dbg_flag) { 2003 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2004 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2005 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2006 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2007 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2008 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2009 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2010 } 2011 2012 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2013 if (!sub_schurs->reuse_mumps) { 2014 IS is_aux1,is_aux2; 2015 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2016 2017 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2018 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2019 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2020 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2021 for (i=0; i<n_D; i++) { 2022 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2023 } 2024 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2025 for (i=0, j=0; i<n_R; i++) { 2026 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2027 aux_array1[j++] = i; 2028 } 2029 } 2030 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2031 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2032 for (i=0, j=0; i<n_B; i++) { 2033 if (!PetscBTLookup(bitmask,is_indices[i])) { 2034 aux_array2[j++] = i; 2035 } 2036 } 2037 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2038 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2039 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2040 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2041 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2042 2043 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2044 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2045 for (i=0, j=0; i<n_R; i++) { 2046 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2047 aux_array1[j++] = i; 2048 } 2049 } 2050 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2051 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2052 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2053 } 2054 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2055 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2056 } else { 2057 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2058 IS tis; 2059 PetscInt schur_size; 2060 2061 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2062 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2063 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2064 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2065 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2066 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2067 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2068 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2069 } 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2077 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2078 { 2079 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2080 PC_IS *pcis = (PC_IS*)pc->data; 2081 PC pc_temp; 2082 Mat A_RR; 2083 MatReuse reuse; 2084 PetscScalar m_one = -1.0; 2085 PetscReal value; 2086 PetscInt n_D,n_R; 2087 PetscBool use_exact,use_exact_reduced,issbaij; 2088 PetscErrorCode ierr; 2089 /* prefixes stuff */ 2090 char dir_prefix[256],neu_prefix[256],str_level[16]; 2091 size_t len; 2092 2093 PetscFunctionBegin; 2094 2095 /* compute prefixes */ 2096 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2097 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2098 if (!pcbddc->current_level) { 2099 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2100 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2101 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2102 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2103 } else { 2104 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2105 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2106 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2107 len -= 15; /* remove "pc_bddc_coarse_" */ 2108 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2109 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2110 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2111 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2112 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2113 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2114 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2115 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2116 } 2117 2118 /* DIRICHLET PROBLEM */ 2119 if (dirichlet) { 2120 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2121 if (pcbddc->local_mat->symmetric_set) { 2122 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2123 } 2124 /* Matrix for Dirichlet problem is pcis->A_II */ 2125 n_D = pcis->n - pcis->n_B; 2126 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2127 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2128 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2129 /* default */ 2130 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2131 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2132 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2133 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2134 if (issbaij) { 2135 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2136 } else { 2137 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2138 } 2139 /* Allow user's customization */ 2140 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2141 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2142 } 2143 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2144 if (sub_schurs->reuse_mumps) { 2145 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2146 2147 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2148 } 2149 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2150 if (!n_D) { 2151 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2152 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2153 } 2154 /* Set Up KSP for Dirichlet problem of BDDC */ 2155 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2156 /* set ksp_D into pcis data */ 2157 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2158 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2159 pcis->ksp_D = pcbddc->ksp_D; 2160 } 2161 2162 /* NEUMANN PROBLEM */ 2163 A_RR = 0; 2164 if (neumann) { 2165 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2166 PetscInt ibs,mbs; 2167 PetscBool issbaij; 2168 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2169 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2170 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2171 if (pcbddc->ksp_R) { /* already created ksp */ 2172 PetscInt nn_R; 2173 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2174 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2175 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2176 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2177 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2178 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2179 reuse = MAT_INITIAL_MATRIX; 2180 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2181 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2182 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2183 reuse = MAT_INITIAL_MATRIX; 2184 } else { /* safe to reuse the matrix */ 2185 reuse = MAT_REUSE_MATRIX; 2186 } 2187 } 2188 /* last check */ 2189 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2190 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2191 reuse = MAT_INITIAL_MATRIX; 2192 } 2193 } else { /* first time, so we need to create the matrix */ 2194 reuse = MAT_INITIAL_MATRIX; 2195 } 2196 /* extract A_RR */ 2197 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2198 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2199 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2200 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2201 if (matis->A == pcbddc->local_mat) { 2202 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2203 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2204 } else { 2205 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2206 } 2207 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2208 if (matis->A == pcbddc->local_mat) { 2209 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2210 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2211 } else { 2212 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2213 } 2214 } 2215 if (!sub_schurs->reuse_mumps) { 2216 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2217 if (pcbddc->local_mat->symmetric_set) { 2218 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2219 } 2220 } else { 2221 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2222 2223 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2224 ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 2225 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2226 } 2227 if (!pcbddc->ksp_R) { /* create object if not present */ 2228 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2229 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2230 /* default */ 2231 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2232 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2233 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2234 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2235 if (issbaij) { 2236 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2237 } else { 2238 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2239 } 2240 /* Allow user's customization */ 2241 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2242 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2243 } 2244 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2245 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2246 if (!n_R) { 2247 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2248 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2249 } 2250 /* Reuse MUMPS solver if it is present */ 2251 if (sub_schurs->reuse_mumps) { 2252 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2253 2254 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2255 } 2256 /* Set Up KSP for Neumann problem of BDDC */ 2257 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2258 } 2259 /* free Neumann problem's matrix */ 2260 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2261 2262 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2263 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2264 if (pcbddc->dbg_flag) { 2265 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2266 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2267 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2268 } 2269 if (dirichlet) { /* Dirichlet */ 2270 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2271 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2272 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2273 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2274 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2275 /* need to be adapted? */ 2276 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2277 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2278 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2279 /* print info */ 2280 if (pcbddc->dbg_flag) { 2281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2282 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2283 } 2284 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2285 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2286 } 2287 } 2288 if (neumann) { /* Neumann */ 2289 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2290 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2291 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2292 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2293 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2294 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2295 /* need to be adapted? */ 2296 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2297 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2298 /* print info */ 2299 if (pcbddc->dbg_flag) { 2300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2301 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2302 } 2303 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2304 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2305 } 2306 } 2307 } 2308 PetscFunctionReturn(0); 2309 } 2310 2311 #undef __FUNCT__ 2312 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2313 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2314 { 2315 PetscErrorCode ierr; 2316 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2317 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2318 2319 PetscFunctionBegin; 2320 if (!sub_schurs->reuse_mumps) { 2321 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2322 } 2323 if (!pcbddc->switch_static) { 2324 if (applytranspose && pcbddc->local_auxmat1) { 2325 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2326 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2327 } 2328 if (!sub_schurs->reuse_mumps) { 2329 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2330 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2331 } else { 2332 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2333 2334 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2335 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2336 } 2337 } else { 2338 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2339 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2340 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2341 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2342 if (applytranspose && pcbddc->local_auxmat1) { 2343 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2344 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2345 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2346 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2347 } 2348 } 2349 if (!sub_schurs->reuse_mumps) { 2350 if (applytranspose) { 2351 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2352 } else { 2353 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2354 } 2355 #if defined(PETSC_HAVE_MUMPS) 2356 } else { 2357 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2358 2359 if (applytranspose) { 2360 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2361 } else { 2362 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2363 } 2364 #endif 2365 } 2366 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2367 if (!pcbddc->switch_static) { 2368 if (!sub_schurs->reuse_mumps) { 2369 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2370 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2371 } else { 2372 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2373 2374 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2375 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2376 } 2377 if (!applytranspose && pcbddc->local_auxmat1) { 2378 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2379 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2380 } 2381 } else { 2382 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2383 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2384 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2385 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2386 if (!applytranspose && pcbddc->local_auxmat1) { 2387 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2388 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2389 } 2390 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2391 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2392 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2393 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2394 } 2395 PetscFunctionReturn(0); 2396 } 2397 2398 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2399 #undef __FUNCT__ 2400 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2401 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2402 { 2403 PetscErrorCode ierr; 2404 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2405 PC_IS* pcis = (PC_IS*) (pc->data); 2406 const PetscScalar zero = 0.0; 2407 2408 PetscFunctionBegin; 2409 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2410 if (applytranspose) { 2411 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2412 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2413 } else { 2414 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2415 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2416 } 2417 2418 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2419 if (pcbddc->benign_n) { 2420 PetscScalar *array; 2421 PetscInt j; 2422 2423 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2424 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2425 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2426 } 2427 2428 /* start communications from local primal nodes to rhs of coarse solver */ 2429 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2430 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2431 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2432 2433 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2434 /* TODO remove null space when doing multilevel */ 2435 if (pcbddc->coarse_ksp) { 2436 Mat coarse_mat; 2437 Vec rhs,sol; 2438 MatNullSpace nullsp; 2439 2440 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2441 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2442 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2443 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2444 if (nullsp) { 2445 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2446 } 2447 if (applytranspose) { 2448 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2449 } else { 2450 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2451 } 2452 if (nullsp) { 2453 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2454 } 2455 } 2456 2457 /* Local solution on R nodes */ 2458 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2459 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2460 } 2461 2462 /* communications from coarse sol to local primal nodes */ 2463 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2464 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2465 2466 /* Sum contributions from two levels */ 2467 if (applytranspose) { 2468 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2469 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2470 } else { 2471 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2472 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2473 } 2474 /* store p0 */ 2475 if (pcbddc->benign_n) { 2476 PetscScalar *array; 2477 PetscInt j; 2478 2479 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2480 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2481 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2482 } 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2488 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2489 { 2490 PetscErrorCode ierr; 2491 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2492 PetscScalar *array; 2493 Vec from,to; 2494 2495 PetscFunctionBegin; 2496 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2497 from = pcbddc->coarse_vec; 2498 to = pcbddc->vec1_P; 2499 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2500 Vec tvec; 2501 2502 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2503 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2504 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2505 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2506 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2507 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2508 } 2509 } else { /* from local to global -> put data in coarse right hand side */ 2510 from = pcbddc->vec1_P; 2511 to = pcbddc->coarse_vec; 2512 } 2513 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2519 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2520 { 2521 PetscErrorCode ierr; 2522 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2523 PetscScalar *array; 2524 Vec from,to; 2525 2526 PetscFunctionBegin; 2527 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2528 from = pcbddc->coarse_vec; 2529 to = pcbddc->vec1_P; 2530 } else { /* from local to global -> put data in coarse right hand side */ 2531 from = pcbddc->vec1_P; 2532 to = pcbddc->coarse_vec; 2533 } 2534 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2535 if (smode == SCATTER_FORWARD) { 2536 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2537 Vec tvec; 2538 2539 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2540 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2541 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2542 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2543 } 2544 } else { 2545 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2546 ierr = VecResetArray(from);CHKERRQ(ierr); 2547 } 2548 } 2549 PetscFunctionReturn(0); 2550 } 2551 2552 /* uncomment for testing purposes */ 2553 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2556 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2557 { 2558 PetscErrorCode ierr; 2559 PC_IS* pcis = (PC_IS*)(pc->data); 2560 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2561 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2562 /* one and zero */ 2563 PetscScalar one=1.0,zero=0.0; 2564 /* space to store constraints and their local indices */ 2565 PetscScalar *constraints_data; 2566 PetscInt *constraints_idxs,*constraints_idxs_B; 2567 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2568 PetscInt *constraints_n; 2569 /* iterators */ 2570 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2571 /* BLAS integers */ 2572 PetscBLASInt lwork,lierr; 2573 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2574 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2575 /* reuse */ 2576 PetscInt olocal_primal_size,olocal_primal_size_cc; 2577 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2578 /* change of basis */ 2579 PetscBool qr_needed; 2580 PetscBT change_basis,qr_needed_idx; 2581 /* auxiliary stuff */ 2582 PetscInt *nnz,*is_indices; 2583 PetscInt ncc; 2584 /* some quantities */ 2585 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2586 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2587 2588 PetscFunctionBegin; 2589 /* Destroy Mat objects computed previously */ 2590 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2591 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2592 /* save info on constraints from previous setup (if any) */ 2593 olocal_primal_size = pcbddc->local_primal_size; 2594 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2595 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2596 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2597 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2598 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2599 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2600 2601 /* print some info */ 2602 if (pcbddc->dbg_flag) { 2603 IS vertices; 2604 PetscInt nv,nedges,nfaces; 2605 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2606 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2607 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2608 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2611 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2613 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2614 } 2615 2616 if (!pcbddc->adaptive_selection) { 2617 IS ISForVertices,*ISForFaces,*ISForEdges; 2618 MatNullSpace nearnullsp; 2619 const Vec *nearnullvecs; 2620 Vec *localnearnullsp; 2621 PetscScalar *array; 2622 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2623 PetscBool nnsp_has_cnst; 2624 /* LAPACK working arrays for SVD or POD */ 2625 PetscBool skip_lapack,boolforchange; 2626 PetscScalar *work; 2627 PetscReal *singular_vals; 2628 #if defined(PETSC_USE_COMPLEX) 2629 PetscReal *rwork; 2630 #endif 2631 #if defined(PETSC_MISSING_LAPACK_GESVD) 2632 PetscScalar *temp_basis,*correlation_mat; 2633 #else 2634 PetscBLASInt dummy_int=1; 2635 PetscScalar dummy_scalar=1.; 2636 #endif 2637 2638 /* Get index sets for faces, edges and vertices from graph */ 2639 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2640 /* free unneeded index sets */ 2641 if (!pcbddc->use_vertices) { 2642 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2643 } 2644 if (!pcbddc->use_edges) { 2645 for (i=0;i<n_ISForEdges;i++) { 2646 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2647 } 2648 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2649 n_ISForEdges = 0; 2650 } 2651 if (!pcbddc->use_faces) { 2652 for (i=0;i<n_ISForFaces;i++) { 2653 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2654 } 2655 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2656 n_ISForFaces = 0; 2657 } 2658 2659 #if defined(PETSC_USE_DEBUG) 2660 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2661 Also use_change_of_basis should be consistent among processors */ 2662 if (pcbddc->NullSpace) { 2663 PetscBool tbool[2],gbool[2]; 2664 2665 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2666 pcbddc->use_change_of_basis = PETSC_TRUE; 2667 if (!ISForEdges) { 2668 pcbddc->use_change_on_faces = PETSC_TRUE; 2669 } 2670 } 2671 tbool[0] = pcbddc->use_change_of_basis; 2672 tbool[1] = pcbddc->use_change_on_faces; 2673 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2674 pcbddc->use_change_of_basis = gbool[0]; 2675 pcbddc->use_change_on_faces = gbool[1]; 2676 } 2677 #endif 2678 2679 /* check if near null space is attached to global mat */ 2680 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2681 if (nearnullsp) { 2682 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2683 /* remove any stored info */ 2684 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2685 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2686 /* store information for BDDC solver reuse */ 2687 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2688 pcbddc->onearnullspace = nearnullsp; 2689 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2690 for (i=0;i<nnsp_size;i++) { 2691 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2692 } 2693 } else { /* if near null space is not provided BDDC uses constants by default */ 2694 nnsp_size = 0; 2695 nnsp_has_cnst = PETSC_TRUE; 2696 } 2697 /* get max number of constraints on a single cc */ 2698 max_constraints = nnsp_size; 2699 if (nnsp_has_cnst) max_constraints++; 2700 2701 /* 2702 Evaluate maximum storage size needed by the procedure 2703 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2704 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2705 There can be multiple constraints per connected component 2706 */ 2707 n_vertices = 0; 2708 if (ISForVertices) { 2709 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2710 } 2711 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2712 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2713 2714 total_counts = n_ISForFaces+n_ISForEdges; 2715 total_counts *= max_constraints; 2716 total_counts += n_vertices; 2717 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2718 2719 total_counts = 0; 2720 max_size_of_constraint = 0; 2721 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2722 IS used_is; 2723 if (i<n_ISForEdges) { 2724 used_is = ISForEdges[i]; 2725 } else { 2726 used_is = ISForFaces[i-n_ISForEdges]; 2727 } 2728 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2729 total_counts += j; 2730 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2731 } 2732 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2733 2734 /* get local part of global near null space vectors */ 2735 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2736 for (k=0;k<nnsp_size;k++) { 2737 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2738 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2739 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2740 } 2741 2742 /* whether or not to skip lapack calls */ 2743 skip_lapack = PETSC_TRUE; 2744 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2745 2746 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2747 if (!skip_lapack) { 2748 PetscScalar temp_work; 2749 2750 #if defined(PETSC_MISSING_LAPACK_GESVD) 2751 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2752 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2753 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2754 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2755 #if defined(PETSC_USE_COMPLEX) 2756 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2757 #endif 2758 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2759 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2760 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2761 lwork = -1; 2762 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2763 #if !defined(PETSC_USE_COMPLEX) 2764 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2765 #else 2766 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2767 #endif 2768 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2769 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2770 #else /* on missing GESVD */ 2771 /* SVD */ 2772 PetscInt max_n,min_n; 2773 max_n = max_size_of_constraint; 2774 min_n = max_constraints; 2775 if (max_size_of_constraint < max_constraints) { 2776 min_n = max_size_of_constraint; 2777 max_n = max_constraints; 2778 } 2779 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2780 #if defined(PETSC_USE_COMPLEX) 2781 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2782 #endif 2783 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2784 lwork = -1; 2785 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2786 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2787 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2788 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2789 #if !defined(PETSC_USE_COMPLEX) 2790 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2791 #else 2792 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2793 #endif 2794 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2795 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2796 #endif /* on missing GESVD */ 2797 /* Allocate optimal workspace */ 2798 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2799 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2800 } 2801 /* Now we can loop on constraining sets */ 2802 total_counts = 0; 2803 constraints_idxs_ptr[0] = 0; 2804 constraints_data_ptr[0] = 0; 2805 /* vertices */ 2806 if (n_vertices) { 2807 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2808 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2809 for (i=0;i<n_vertices;i++) { 2810 constraints_n[total_counts] = 1; 2811 constraints_data[total_counts] = 1.0; 2812 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2813 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2814 total_counts++; 2815 } 2816 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2817 n_vertices = total_counts; 2818 } 2819 2820 /* edges and faces */ 2821 total_counts_cc = total_counts; 2822 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2823 IS used_is; 2824 PetscBool idxs_copied = PETSC_FALSE; 2825 2826 if (ncc<n_ISForEdges) { 2827 used_is = ISForEdges[ncc]; 2828 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2829 } else { 2830 used_is = ISForFaces[ncc-n_ISForEdges]; 2831 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2832 } 2833 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2834 2835 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2836 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2837 /* change of basis should not be performed on local periodic nodes */ 2838 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2839 if (nnsp_has_cnst) { 2840 PetscScalar quad_value; 2841 2842 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2843 idxs_copied = PETSC_TRUE; 2844 2845 if (!pcbddc->use_nnsp_true) { 2846 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2847 } else { 2848 quad_value = 1.0; 2849 } 2850 for (j=0;j<size_of_constraint;j++) { 2851 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2852 } 2853 temp_constraints++; 2854 total_counts++; 2855 } 2856 for (k=0;k<nnsp_size;k++) { 2857 PetscReal real_value; 2858 PetscScalar *ptr_to_data; 2859 2860 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2861 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2862 for (j=0;j<size_of_constraint;j++) { 2863 ptr_to_data[j] = array[is_indices[j]]; 2864 } 2865 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2866 /* check if array is null on the connected component */ 2867 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2868 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2869 if (real_value > 0.0) { /* keep indices and values */ 2870 temp_constraints++; 2871 total_counts++; 2872 if (!idxs_copied) { 2873 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2874 idxs_copied = PETSC_TRUE; 2875 } 2876 } 2877 } 2878 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2879 valid_constraints = temp_constraints; 2880 if (!pcbddc->use_nnsp_true && temp_constraints) { 2881 if (temp_constraints == 1) { /* just normalize the constraint */ 2882 PetscScalar norm,*ptr_to_data; 2883 2884 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2885 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2886 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 2887 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 2888 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 2889 } else { /* perform SVD */ 2890 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 2891 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 2892 2893 #if defined(PETSC_MISSING_LAPACK_GESVD) 2894 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 2895 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 2896 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 2897 the constraints basis will differ (by a complex factor with absolute value equal to 1) 2898 from that computed using LAPACKgesvd 2899 -> This is due to a different computation of eigenvectors in LAPACKheev 2900 -> The quality of the POD-computed basis will be the same */ 2901 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 2902 /* Store upper triangular part of correlation matrix */ 2903 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2904 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2905 for (j=0;j<temp_constraints;j++) { 2906 for (k=0;k<j+1;k++) { 2907 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 2908 } 2909 } 2910 /* compute eigenvalues and eigenvectors of correlation matrix */ 2911 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 2913 #if !defined(PETSC_USE_COMPLEX) 2914 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 2915 #else 2916 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 2917 #endif 2918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2919 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 2920 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 2921 j = 0; 2922 while (j < temp_constraints && singular_vals[j] < tol) j++; 2923 total_counts = total_counts-j; 2924 valid_constraints = temp_constraints-j; 2925 /* scale and copy POD basis into used quadrature memory */ 2926 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2927 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2928 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 2929 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2930 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 2931 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 2932 if (j<temp_constraints) { 2933 PetscInt ii; 2934 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 2935 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2936 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 2937 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2938 for (k=0;k<temp_constraints-j;k++) { 2939 for (ii=0;ii<size_of_constraint;ii++) { 2940 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 2941 } 2942 } 2943 } 2944 #else /* on missing GESVD */ 2945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 2946 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 2947 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2948 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2949 #if !defined(PETSC_USE_COMPLEX) 2950 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 2951 #else 2952 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 2953 #endif 2954 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 2955 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2956 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 2957 k = temp_constraints; 2958 if (k > size_of_constraint) k = size_of_constraint; 2959 j = 0; 2960 while (j < k && singular_vals[k-j-1] < tol) j++; 2961 valid_constraints = k-j; 2962 total_counts = total_counts-temp_constraints+valid_constraints; 2963 #endif /* on missing GESVD */ 2964 } 2965 } 2966 /* update pointers information */ 2967 if (valid_constraints) { 2968 constraints_n[total_counts_cc] = valid_constraints; 2969 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 2970 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 2971 /* set change_of_basis flag */ 2972 if (boolforchange) { 2973 PetscBTSet(change_basis,total_counts_cc); 2974 } 2975 total_counts_cc++; 2976 } 2977 } 2978 /* free workspace */ 2979 if (!skip_lapack) { 2980 ierr = PetscFree(work);CHKERRQ(ierr); 2981 #if defined(PETSC_USE_COMPLEX) 2982 ierr = PetscFree(rwork);CHKERRQ(ierr); 2983 #endif 2984 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 2985 #if defined(PETSC_MISSING_LAPACK_GESVD) 2986 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 2987 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 2988 #endif 2989 } 2990 for (k=0;k<nnsp_size;k++) { 2991 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 2992 } 2993 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 2994 /* free index sets of faces, edges and vertices */ 2995 for (i=0;i<n_ISForFaces;i++) { 2996 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2997 } 2998 if (n_ISForFaces) { 2999 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3000 } 3001 for (i=0;i<n_ISForEdges;i++) { 3002 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3003 } 3004 if (n_ISForEdges) { 3005 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3006 } 3007 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3008 } else { 3009 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3010 3011 total_counts = 0; 3012 n_vertices = 0; 3013 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3014 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3015 } 3016 max_constraints = 0; 3017 total_counts_cc = 0; 3018 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3019 total_counts += pcbddc->adaptive_constraints_n[i]; 3020 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3021 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3022 } 3023 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3024 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3025 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3026 constraints_data = pcbddc->adaptive_constraints_data; 3027 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3028 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3029 total_counts_cc = 0; 3030 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3031 if (pcbddc->adaptive_constraints_n[i]) { 3032 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3033 } 3034 } 3035 #if 0 3036 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3037 for (i=0;i<total_counts_cc;i++) { 3038 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3039 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3040 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3041 printf(" %d",constraints_idxs[j]); 3042 } 3043 printf("\n"); 3044 printf("number of cc: %d\n",constraints_n[i]); 3045 } 3046 for (i=0;i<n_vertices;i++) { 3047 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3048 } 3049 for (i=0;i<sub_schurs->n_subs;i++) { 3050 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3051 } 3052 #endif 3053 3054 max_size_of_constraint = 0; 3055 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3056 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3057 /* Change of basis */ 3058 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3059 if (pcbddc->use_change_of_basis) { 3060 for (i=0;i<sub_schurs->n_subs;i++) { 3061 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3062 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3063 } 3064 } 3065 } 3066 } 3067 pcbddc->local_primal_size = total_counts; 3068 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3069 3070 /* map constraints_idxs in boundary numbering */ 3071 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3072 if (i != constraints_idxs_ptr[total_counts_cc]) { 3073 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3074 } 3075 3076 /* Create constraint matrix */ 3077 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3078 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3079 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3080 3081 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3082 /* determine if a QR strategy is needed for change of basis */ 3083 qr_needed = PETSC_FALSE; 3084 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3085 total_primal_vertices=0; 3086 pcbddc->local_primal_size_cc = 0; 3087 for (i=0;i<total_counts_cc;i++) { 3088 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3089 if (size_of_constraint == 1) { 3090 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3091 pcbddc->local_primal_size_cc += 1; 3092 } else if (PetscBTLookup(change_basis,i)) { 3093 for (k=0;k<constraints_n[i];k++) { 3094 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3095 } 3096 pcbddc->local_primal_size_cc += constraints_n[i]; 3097 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3098 PetscBTSet(qr_needed_idx,i); 3099 qr_needed = PETSC_TRUE; 3100 } 3101 } else { 3102 pcbddc->local_primal_size_cc += 1; 3103 } 3104 } 3105 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3106 pcbddc->n_vertices = total_primal_vertices; 3107 /* permute indices in order to have a sorted set of vertices */ 3108 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3109 3110 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3111 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3112 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3113 3114 /* nonzero structure of constraint matrix */ 3115 /* and get reference dof for local constraints */ 3116 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3117 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3118 3119 j = total_primal_vertices; 3120 total_counts = total_primal_vertices; 3121 cum = total_primal_vertices; 3122 for (i=n_vertices;i<total_counts_cc;i++) { 3123 if (!PetscBTLookup(change_basis,i)) { 3124 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3125 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3126 cum++; 3127 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3128 for (k=0;k<constraints_n[i];k++) { 3129 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3130 nnz[j+k] = size_of_constraint; 3131 } 3132 j += constraints_n[i]; 3133 } 3134 } 3135 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3136 ierr = PetscFree(nnz);CHKERRQ(ierr); 3137 3138 /* set values in constraint matrix */ 3139 for (i=0;i<total_primal_vertices;i++) { 3140 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3141 } 3142 total_counts = total_primal_vertices; 3143 for (i=n_vertices;i<total_counts_cc;i++) { 3144 if (!PetscBTLookup(change_basis,i)) { 3145 PetscInt *cols; 3146 3147 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3148 cols = constraints_idxs+constraints_idxs_ptr[i]; 3149 for (k=0;k<constraints_n[i];k++) { 3150 PetscInt row = total_counts+k; 3151 PetscScalar *vals; 3152 3153 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3154 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3155 } 3156 total_counts += constraints_n[i]; 3157 } 3158 } 3159 /* assembling */ 3160 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3161 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 3163 /* 3164 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3165 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3166 */ 3167 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3168 if (pcbddc->use_change_of_basis) { 3169 /* dual and primal dofs on a single cc */ 3170 PetscInt dual_dofs,primal_dofs; 3171 /* working stuff for GEQRF */ 3172 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3173 PetscBLASInt lqr_work; 3174 /* working stuff for UNGQR */ 3175 PetscScalar *gqr_work,lgqr_work_t; 3176 PetscBLASInt lgqr_work; 3177 /* working stuff for TRTRS */ 3178 PetscScalar *trs_rhs; 3179 PetscBLASInt Blas_NRHS; 3180 /* pointers for values insertion into change of basis matrix */ 3181 PetscInt *start_rows,*start_cols; 3182 PetscScalar *start_vals; 3183 /* working stuff for values insertion */ 3184 PetscBT is_primal; 3185 PetscInt *aux_primal_numbering_B; 3186 /* matrix sizes */ 3187 PetscInt global_size,local_size; 3188 /* temporary change of basis */ 3189 Mat localChangeOfBasisMatrix; 3190 /* extra space for debugging */ 3191 PetscScalar *dbg_work; 3192 3193 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3194 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3195 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3196 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3197 /* nonzeros for local mat */ 3198 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3199 for (i=0;i<pcis->n;i++) nnz[i]=1; 3200 for (i=n_vertices;i<total_counts_cc;i++) { 3201 if (PetscBTLookup(change_basis,i)) { 3202 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3203 if (PetscBTLookup(qr_needed_idx,i)) { 3204 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3205 } else { 3206 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3207 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3208 } 3209 } 3210 } 3211 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3212 ierr = PetscFree(nnz);CHKERRQ(ierr); 3213 /* Set initial identity in the matrix */ 3214 for (i=0;i<pcis->n;i++) { 3215 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3216 } 3217 3218 if (pcbddc->dbg_flag) { 3219 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3220 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3221 } 3222 3223 3224 /* Now we loop on the constraints which need a change of basis */ 3225 /* 3226 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3227 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3228 3229 Basic blocks of change of basis matrix T computed by 3230 3231 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3232 3233 | 1 0 ... 0 s_1/S | 3234 | 0 1 ... 0 s_2/S | 3235 | ... | 3236 | 0 ... 1 s_{n-1}/S | 3237 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3238 3239 with S = \sum_{i=1}^n s_i^2 3240 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3241 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3242 3243 - QR decomposition of constraints otherwise 3244 */ 3245 if (qr_needed) { 3246 /* space to store Q */ 3247 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3248 /* first we issue queries for optimal work */ 3249 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3250 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3251 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3252 lqr_work = -1; 3253 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3254 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3255 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3256 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3257 lgqr_work = -1; 3258 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3259 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3260 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3261 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3262 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3263 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3264 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3265 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3266 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3267 /* array to store scaling factors for reflectors */ 3268 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3269 /* array to store rhs and solution of triangular solver */ 3270 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3271 /* allocating workspace for check */ 3272 if (pcbddc->dbg_flag) { 3273 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3274 } 3275 } 3276 /* array to store whether a node is primal or not */ 3277 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3278 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3279 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3280 if (i != total_primal_vertices) { 3281 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3282 } 3283 for (i=0;i<total_primal_vertices;i++) { 3284 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3285 } 3286 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3287 3288 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3289 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3290 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3291 if (PetscBTLookup(change_basis,total_counts)) { 3292 /* get constraint info */ 3293 primal_dofs = constraints_n[total_counts]; 3294 dual_dofs = size_of_constraint-primal_dofs; 3295 3296 if (pcbddc->dbg_flag) { 3297 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3298 } 3299 3300 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3301 3302 /* copy quadrature constraints for change of basis check */ 3303 if (pcbddc->dbg_flag) { 3304 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3305 } 3306 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3307 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3308 3309 /* compute QR decomposition of constraints */ 3310 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3311 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3312 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3313 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3314 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3315 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3316 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3317 3318 /* explictly compute R^-T */ 3319 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3320 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3321 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3322 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3323 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3324 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3325 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3326 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3327 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3328 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3329 3330 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3332 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3333 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3334 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3335 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3336 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3337 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3338 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3339 3340 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3341 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3342 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3343 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3344 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3345 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3346 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3347 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3348 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3349 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3350 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3351 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3352 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3353 3354 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3355 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3356 /* insert cols for primal dofs */ 3357 for (j=0;j<primal_dofs;j++) { 3358 start_vals = &qr_basis[j*size_of_constraint]; 3359 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3360 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3361 } 3362 /* insert cols for dual dofs */ 3363 for (j=0,k=0;j<dual_dofs;k++) { 3364 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3365 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3366 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3367 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3368 j++; 3369 } 3370 } 3371 3372 /* check change of basis */ 3373 if (pcbddc->dbg_flag) { 3374 PetscInt ii,jj; 3375 PetscBool valid_qr=PETSC_TRUE; 3376 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3377 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3378 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3379 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3380 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3381 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3382 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3383 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3384 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3385 for (jj=0;jj<size_of_constraint;jj++) { 3386 for (ii=0;ii<primal_dofs;ii++) { 3387 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3388 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3389 } 3390 } 3391 if (!valid_qr) { 3392 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3393 for (jj=0;jj<size_of_constraint;jj++) { 3394 for (ii=0;ii<primal_dofs;ii++) { 3395 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3396 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3397 } 3398 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3399 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3400 } 3401 } 3402 } 3403 } else { 3404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3405 } 3406 } 3407 } else { /* simple transformation block */ 3408 PetscInt row,col; 3409 PetscScalar val,norm; 3410 3411 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3412 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3413 for (j=0;j<size_of_constraint;j++) { 3414 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3415 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3416 if (!PetscBTLookup(is_primal,row_B)) { 3417 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3418 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3419 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3420 } else { 3421 for (k=0;k<size_of_constraint;k++) { 3422 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3423 if (row != col) { 3424 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3425 } else { 3426 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3427 } 3428 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3429 } 3430 } 3431 } 3432 if (pcbddc->dbg_flag) { 3433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3434 } 3435 } 3436 } else { 3437 if (pcbddc->dbg_flag) { 3438 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3439 } 3440 } 3441 } 3442 3443 /* free workspace */ 3444 if (qr_needed) { 3445 if (pcbddc->dbg_flag) { 3446 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3447 } 3448 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3449 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3450 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3451 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3452 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3453 } 3454 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3455 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3456 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3457 3458 /* assembling of global change of variable */ 3459 { 3460 Mat tmat; 3461 PetscInt bs; 3462 3463 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3464 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3465 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3466 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3467 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3468 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3469 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3470 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3471 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3472 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3473 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3474 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3475 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3476 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3477 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3478 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3479 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3480 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3481 } 3482 /* check */ 3483 if (pcbddc->dbg_flag) { 3484 PetscReal error; 3485 Vec x,x_change; 3486 3487 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3488 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3489 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3490 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3491 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3492 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3493 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3494 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3495 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3496 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3497 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3498 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3499 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3500 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3501 ierr = VecDestroy(&x);CHKERRQ(ierr); 3502 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3503 } 3504 3505 /* adapt sub_schurs computed (if any) */ 3506 if (pcbddc->use_deluxe_scaling) { 3507 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3508 if (sub_schurs->S_Ej_all) { 3509 Mat S_new,tmat; 3510 ISLocalToGlobalMapping NtoSall; 3511 IS is_all_N,is_V,is_V_Sall; 3512 const PetscScalar *array; 3513 const PetscInt *idxs_V,*idxs_all; 3514 PetscInt i,n_V; 3515 3516 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3517 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3518 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3519 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3520 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3521 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3522 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3523 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3524 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3525 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3526 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3527 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3528 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3529 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3530 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3531 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3532 for (i=0;i<n_V;i++) { 3533 PetscScalar val; 3534 PetscInt idx; 3535 3536 idx = idxs_V[i]; 3537 val = array[idxs_all[idxs_V[i]]]; 3538 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3539 } 3540 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3541 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3542 sub_schurs->S_Ej_all = S_new; 3543 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3544 if (sub_schurs->sum_S_Ej_all) { 3545 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3546 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3547 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3548 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3549 sub_schurs->sum_S_Ej_all = S_new; 3550 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3551 } 3552 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3553 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3554 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3555 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3556 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3557 } 3558 } 3559 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3560 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3561 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3562 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3563 } 3564 3565 /* set up change of basis context */ 3566 if (pcbddc->ChangeOfBasisMatrix) { 3567 PCBDDCChange_ctx change_ctx; 3568 3569 if (!pcbddc->new_global_mat) { 3570 PetscInt global_size,local_size; 3571 3572 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3573 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3574 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3575 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3576 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3577 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3578 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3579 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3580 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3581 } else { 3582 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3583 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3584 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3585 } 3586 if (!pcbddc->user_ChangeOfBasisMatrix) { 3587 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3588 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3589 } else { 3590 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3591 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3592 } 3593 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3594 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3595 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3596 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3597 } 3598 3599 /* add pressure dofs to set of primal nodes for numbering purposes */ 3600 for (i=0;i<pcbddc->benign_n;i++) { 3601 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3602 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3603 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3604 pcbddc->local_primal_size_cc++; 3605 pcbddc->local_primal_size++; 3606 } 3607 3608 /* check if a new primal space has been introduced (also take into account benign trick) */ 3609 pcbddc->new_primal_space_local = PETSC_TRUE; 3610 if (olocal_primal_size == pcbddc->local_primal_size) { 3611 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3612 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3613 if (!pcbddc->new_primal_space_local) { 3614 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3615 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3616 } 3617 } 3618 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3619 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3620 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3621 3622 /* flush dbg viewer */ 3623 if (pcbddc->dbg_flag) { 3624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3625 } 3626 3627 /* free workspace */ 3628 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3629 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3630 if (!pcbddc->adaptive_selection) { 3631 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3632 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3633 } else { 3634 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3635 pcbddc->adaptive_constraints_idxs_ptr, 3636 pcbddc->adaptive_constraints_data_ptr, 3637 pcbddc->adaptive_constraints_idxs, 3638 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3639 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3640 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3641 } 3642 PetscFunctionReturn(0); 3643 } 3644 3645 #undef __FUNCT__ 3646 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3647 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3648 { 3649 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3650 PC_IS *pcis = (PC_IS*)pc->data; 3651 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3652 PetscInt ierr,i,vertex_size,N; 3653 PetscViewer viewer=pcbddc->dbg_viewer; 3654 3655 PetscFunctionBegin; 3656 /* Reset previously computed graph */ 3657 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3658 /* Init local Graph struct */ 3659 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3660 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3661 3662 /* Check validity of the csr graph passed in by the user */ 3663 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3664 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3665 } 3666 3667 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3668 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3669 PetscInt *xadj,*adjncy; 3670 PetscInt nvtxs; 3671 PetscBool flg_row=PETSC_FALSE; 3672 3673 if (pcbddc->use_local_adj) { 3674 3675 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3676 if (flg_row) { 3677 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3678 pcbddc->computed_rowadj = PETSC_TRUE; 3679 } 3680 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3681 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3682 IS is_dummy; 3683 ISLocalToGlobalMapping l2gmap_dummy; 3684 PetscInt j,sum; 3685 PetscInt *cxadj,*cadjncy; 3686 const PetscInt *idxs; 3687 PCBDDCGraph graph; 3688 PetscBT is_on_boundary; 3689 3690 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3691 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3692 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3693 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3694 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3695 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3696 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3697 if (flg_row) { 3698 graph->xadj = xadj; 3699 graph->adjncy = adjncy; 3700 } 3701 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3702 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3703 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3704 3705 if (pcbddc->dbg_flag) { 3706 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3707 for (i=0;i<graph->ncc;i++) { 3708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3709 } 3710 } 3711 3712 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3713 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3714 for (i=0;i<pcis->n_B;i++) { 3715 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3716 } 3717 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3718 3719 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3720 sum = 0; 3721 for (i=0;i<graph->ncc;i++) { 3722 PetscInt sizecc = 0; 3723 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3724 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3725 sizecc++; 3726 } 3727 } 3728 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3729 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3730 cxadj[graph->queue[j]] = sizecc; 3731 } 3732 } 3733 sum += sizecc*sizecc; 3734 } 3735 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3736 sum = 0; 3737 for (i=0;i<pcis->n;i++) { 3738 PetscInt temp = cxadj[i]; 3739 cxadj[i] = sum; 3740 sum += temp; 3741 } 3742 cxadj[pcis->n] = sum; 3743 for (i=0;i<graph->ncc;i++) { 3744 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3745 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3746 PetscInt k,sizecc = 0; 3747 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3748 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3749 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3750 sizecc++; 3751 } 3752 } 3753 } 3754 } 3755 } 3756 if (sum) { 3757 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3758 } else { 3759 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3760 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3761 } 3762 graph->xadj = 0; 3763 graph->adjncy = 0; 3764 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3765 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3766 } 3767 } 3768 if (pcbddc->dbg_flag) { 3769 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3770 } 3771 3772 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3773 vertex_size = 1; 3774 if (pcbddc->user_provided_isfordofs) { 3775 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3776 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3777 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3778 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3779 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3780 } 3781 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3782 pcbddc->n_ISForDofs = 0; 3783 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3784 } 3785 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3786 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3787 } else { 3788 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3789 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3790 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3791 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3792 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3793 } 3794 } 3795 } 3796 3797 /* Setup of Graph */ 3798 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3799 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3800 } 3801 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3802 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3803 } 3804 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr); 3805 3806 /* attach info on disconnected subdomains if present */ 3807 if (pcbddc->n_local_subs) { 3808 PetscInt *local_subs; 3809 3810 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3811 for (i=0;i<pcbddc->n_local_subs;i++) { 3812 const PetscInt *idxs; 3813 PetscInt nl,j; 3814 3815 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3816 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3817 for (j=0;j<nl;j++) { 3818 local_subs[idxs[j]] = i; 3819 } 3820 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3821 } 3822 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3823 pcbddc->mat_graph->local_subs = local_subs; 3824 } 3825 3826 /* Graph's connected components analysis */ 3827 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3828 3829 /* print some info to stdout */ 3830 if (pcbddc->dbg_flag) { 3831 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3832 } 3833 3834 /* mark topography has done */ 3835 pcbddc->recompute_topography = PETSC_FALSE; 3836 PetscFunctionReturn(0); 3837 } 3838 3839 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3840 #undef __FUNCT__ 3841 #define __FUNCT__ "PCBDDCSubsetNumbering" 3842 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3843 { 3844 PetscSF sf; 3845 PetscLayout map; 3846 const PetscInt *idxs; 3847 PetscInt *leaf_data,*root_data,*gidxs; 3848 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3849 PetscInt n_n,nlocals,start,first_index; 3850 PetscMPIInt commsize; 3851 PetscBool first_found; 3852 PetscErrorCode ierr; 3853 3854 PetscFunctionBegin; 3855 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3856 if (subset_mult) { 3857 PetscCheckSameComm(subset,1,subset_mult,2); 3858 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3859 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3860 } 3861 /* create workspace layout for computing global indices of subset */ 3862 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3863 lbounds[0] = lbounds[1] = 0; 3864 for (i=0;i<n;i++) { 3865 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3866 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3867 } 3868 lbounds[0] = -lbounds[0]; 3869 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3870 gbounds[0] = -gbounds[0]; 3871 N = gbounds[1] - gbounds[0] + 1; 3872 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 3873 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 3874 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 3875 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 3876 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 3877 3878 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 3879 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 3880 if (subset_mult) { 3881 const PetscInt* idxs_mult; 3882 3883 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3884 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 3885 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3886 } else { 3887 for (i=0;i<n;i++) leaf_data[i] = 1; 3888 } 3889 /* local size of new subset */ 3890 n_n = 0; 3891 for (i=0;i<n;i++) n_n += leaf_data[i]; 3892 3893 /* global indexes in layout */ 3894 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 3895 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 3896 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 3897 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 3898 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 3899 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 3900 3901 /* reduce from leaves to roots */ 3902 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 3903 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3904 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 3905 3906 /* count indexes in local part of layout */ 3907 nlocals = 0; 3908 first_index = -1; 3909 first_found = PETSC_FALSE; 3910 for (i=0;i<Nl;i++) { 3911 if (!first_found && root_data[i]) { 3912 first_found = PETSC_TRUE; 3913 first_index = i; 3914 } 3915 nlocals += root_data[i]; 3916 } 3917 3918 /* cumulative of number of indexes and size of subset without holes */ 3919 #if defined(PETSC_HAVE_MPI_EXSCAN) 3920 start = 0; 3921 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3922 #else 3923 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3924 start = start-nlocals; 3925 #endif 3926 3927 if (N_n) { /* compute total size of new subset if requested */ 3928 *N_n = start + nlocals; 3929 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 3930 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3931 } 3932 3933 /* adapt root data with cumulative */ 3934 if (first_found) { 3935 PetscInt old_index; 3936 3937 root_data[first_index] += start; 3938 old_index = first_index; 3939 for (i=first_index+1;i<Nl;i++) { 3940 if (root_data[i]) { 3941 root_data[i] += root_data[old_index]; 3942 old_index = i; 3943 } 3944 } 3945 } 3946 3947 /* from roots to leaves */ 3948 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3949 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 3950 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 3951 3952 /* create new IS with global indexes without holes */ 3953 if (subset_mult) { 3954 const PetscInt* idxs_mult; 3955 PetscInt cum; 3956 3957 cum = 0; 3958 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3959 for (i=0;i<n;i++) { 3960 PetscInt j; 3961 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 3962 } 3963 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 3964 } else { 3965 for (i=0;i<n;i++) { 3966 gidxs[i] = leaf_data[i]-1; 3967 } 3968 } 3969 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 3970 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 3971 PetscFunctionReturn(0); 3972 } 3973 3974 #undef __FUNCT__ 3975 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 3976 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 3977 { 3978 PetscInt i,j; 3979 PetscScalar *alphas; 3980 PetscErrorCode ierr; 3981 3982 PetscFunctionBegin; 3983 /* this implements stabilized Gram-Schmidt */ 3984 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 3985 for (i=0;i<n;i++) { 3986 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 3987 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 3988 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 3989 } 3990 ierr = PetscFree(alphas);CHKERRQ(ierr); 3991 PetscFunctionReturn(0); 3992 } 3993 3994 #undef __FUNCT__ 3995 #define __FUNCT__ "MatISGetSubassemblingPattern" 3996 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 3997 { 3998 IS ranks_send_to; 3999 PetscInt n_neighs,*neighs,*n_shared,**shared; 4000 PetscMPIInt size,rank,color; 4001 PetscInt *xadj,*adjncy; 4002 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4003 PetscInt i,local_size,threshold=0; 4004 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4005 PetscSubcomm subcomm; 4006 PetscErrorCode ierr; 4007 4008 PetscFunctionBegin; 4009 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4010 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4011 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4012 4013 /* Get info on mapping */ 4014 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4015 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4016 4017 /* build local CSR graph of subdomains' connectivity */ 4018 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4019 xadj[0] = 0; 4020 xadj[1] = PetscMax(n_neighs-1,0); 4021 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4022 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4023 4024 if (threshold) { 4025 PetscInt xadj_count = 0; 4026 for (i=1;i<n_neighs;i++) { 4027 if (n_shared[i] > threshold) { 4028 adjncy[xadj_count] = neighs[i]; 4029 adjncy_wgt[xadj_count] = n_shared[i]; 4030 xadj_count++; 4031 } 4032 } 4033 xadj[1] = xadj_count; 4034 } else { 4035 if (xadj[1]) { 4036 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4037 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4038 } 4039 } 4040 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4041 if (use_square) { 4042 for (i=0;i<xadj[1];i++) { 4043 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4044 } 4045 } 4046 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4047 4048 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4049 4050 /* 4051 Restrict work on active processes only. 4052 */ 4053 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4054 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4055 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4056 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4057 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4058 if (color) { 4059 ierr = PetscFree(xadj);CHKERRQ(ierr); 4060 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4061 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4062 } else { 4063 Mat subdomain_adj; 4064 IS new_ranks,new_ranks_contig; 4065 MatPartitioning partitioner; 4066 PetscInt prank,rstart=0,rend=0; 4067 PetscInt *is_indices,*oldranks; 4068 PetscBool aggregate; 4069 4070 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4071 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4072 prank = rank; 4073 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4074 /* 4075 for (i=0;i<size;i++) { 4076 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4077 } 4078 */ 4079 for (i=0;i<xadj[1];i++) { 4080 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4081 } 4082 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4083 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4084 if (aggregate) { 4085 PetscInt lrows,row,ncols,*cols; 4086 PetscMPIInt nrank; 4087 PetscScalar *vals; 4088 4089 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4090 lrows = 0; 4091 if (nrank<redprocs) { 4092 lrows = size/redprocs; 4093 if (nrank<size%redprocs) lrows++; 4094 } 4095 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4096 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4097 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4098 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4099 row = nrank; 4100 ncols = xadj[1]-xadj[0]; 4101 cols = adjncy; 4102 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4103 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4104 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4105 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4106 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4107 ierr = PetscFree(xadj);CHKERRQ(ierr); 4108 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4109 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4110 ierr = PetscFree(vals);CHKERRQ(ierr); 4111 } else { 4112 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4113 } 4114 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4115 4116 /* Partition */ 4117 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4118 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4119 if (use_vwgt) { 4120 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4121 v_wgt[0] = local_size; 4122 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4123 } 4124 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4125 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4126 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4127 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4128 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4129 4130 /* renumber new_ranks to avoid "holes" in new set of processors */ 4131 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4132 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4133 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4134 if (!redprocs) { 4135 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4136 } else { 4137 PetscInt idxs[1]; 4138 PetscMPIInt tag; 4139 MPI_Request *reqs; 4140 4141 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4142 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4143 for (i=rstart;i<rend;i++) { 4144 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4145 } 4146 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4147 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4148 ierr = PetscFree(reqs);CHKERRQ(ierr); 4149 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4150 } 4151 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4152 /* clean up */ 4153 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4154 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4155 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4156 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4157 } 4158 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4159 4160 /* assemble parallel IS for sends */ 4161 i = 1; 4162 if (color) i=0; 4163 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4164 /* get back IS */ 4165 *is_sends = ranks_send_to; 4166 PetscFunctionReturn(0); 4167 } 4168 4169 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4170 4171 #undef __FUNCT__ 4172 #define __FUNCT__ "MatISSubassemble" 4173 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4174 { 4175 Mat local_mat; 4176 IS is_sends_internal; 4177 PetscInt rows,cols,new_local_rows; 4178 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4179 PetscBool ismatis,isdense,newisdense,destroy_mat; 4180 ISLocalToGlobalMapping l2gmap; 4181 PetscInt* l2gmap_indices; 4182 const PetscInt* is_indices; 4183 MatType new_local_type; 4184 /* buffers */ 4185 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4186 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4187 PetscInt *recv_buffer_idxs_local; 4188 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4189 /* MPI */ 4190 MPI_Comm comm,comm_n; 4191 PetscSubcomm subcomm; 4192 PetscMPIInt n_sends,n_recvs,commsize; 4193 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4194 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4195 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4196 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4197 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4198 PetscErrorCode ierr; 4199 4200 PetscFunctionBegin; 4201 /* TODO: add missing checks */ 4202 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4203 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4204 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4205 PetscValidLogicalCollectiveInt(mat,nis,7); 4206 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4207 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4208 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4209 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4210 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4211 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4212 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4213 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4214 PetscInt mrows,mcols,mnrows,mncols; 4215 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4216 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4217 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4218 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4219 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4220 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4221 } 4222 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4223 PetscValidLogicalCollectiveInt(mat,bs,0); 4224 /* prepare IS for sending if not provided */ 4225 if (!is_sends) { 4226 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4227 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4228 } else { 4229 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4230 is_sends_internal = is_sends; 4231 } 4232 4233 /* get comm */ 4234 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4235 4236 /* compute number of sends */ 4237 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4238 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4239 4240 /* compute number of receives */ 4241 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4242 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4243 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4244 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4245 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4246 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4247 ierr = PetscFree(iflags);CHKERRQ(ierr); 4248 4249 /* restrict comm if requested */ 4250 subcomm = 0; 4251 destroy_mat = PETSC_FALSE; 4252 if (restrict_comm) { 4253 PetscMPIInt color,subcommsize; 4254 4255 color = 0; 4256 if (restrict_full) { 4257 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4258 } else { 4259 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4260 } 4261 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4262 subcommsize = commsize - subcommsize; 4263 /* check if reuse has been requested */ 4264 if (reuse == MAT_REUSE_MATRIX) { 4265 if (*mat_n) { 4266 PetscMPIInt subcommsize2; 4267 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4268 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4269 comm_n = PetscObjectComm((PetscObject)*mat_n); 4270 } else { 4271 comm_n = PETSC_COMM_SELF; 4272 } 4273 } else { /* MAT_INITIAL_MATRIX */ 4274 PetscMPIInt rank; 4275 4276 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4277 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4278 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4279 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4280 comm_n = PetscSubcommChild(subcomm); 4281 } 4282 /* flag to destroy *mat_n if not significative */ 4283 if (color) destroy_mat = PETSC_TRUE; 4284 } else { 4285 comm_n = comm; 4286 } 4287 4288 /* prepare send/receive buffers */ 4289 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4290 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4291 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4292 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4293 if (nis) { 4294 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4295 } 4296 4297 /* Get data from local matrices */ 4298 if (!isdense) { 4299 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4300 /* TODO: See below some guidelines on how to prepare the local buffers */ 4301 /* 4302 send_buffer_vals should contain the raw values of the local matrix 4303 send_buffer_idxs should contain: 4304 - MatType_PRIVATE type 4305 - PetscInt size_of_l2gmap 4306 - PetscInt global_row_indices[size_of_l2gmap] 4307 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4308 */ 4309 } else { 4310 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4311 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4312 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4313 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4314 send_buffer_idxs[1] = i; 4315 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4316 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4317 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4318 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4319 for (i=0;i<n_sends;i++) { 4320 ilengths_vals[is_indices[i]] = len*len; 4321 ilengths_idxs[is_indices[i]] = len+2; 4322 } 4323 } 4324 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4325 /* additional is (if any) */ 4326 if (nis) { 4327 PetscMPIInt psum; 4328 PetscInt j; 4329 for (j=0,psum=0;j<nis;j++) { 4330 PetscInt plen; 4331 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4332 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4333 psum += len+1; /* indices + lenght */ 4334 } 4335 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4336 for (j=0,psum=0;j<nis;j++) { 4337 PetscInt plen; 4338 const PetscInt *is_array_idxs; 4339 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4340 send_buffer_idxs_is[psum] = plen; 4341 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4342 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4343 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4344 psum += plen+1; /* indices + lenght */ 4345 } 4346 for (i=0;i<n_sends;i++) { 4347 ilengths_idxs_is[is_indices[i]] = psum; 4348 } 4349 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4350 } 4351 4352 buf_size_idxs = 0; 4353 buf_size_vals = 0; 4354 buf_size_idxs_is = 0; 4355 for (i=0;i<n_recvs;i++) { 4356 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4357 buf_size_vals += (PetscInt)olengths_vals[i]; 4358 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4359 } 4360 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4361 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4362 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4363 4364 /* get new tags for clean communications */ 4365 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4366 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4367 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4368 4369 /* allocate for requests */ 4370 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4371 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4372 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4373 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4374 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4375 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4376 4377 /* communications */ 4378 ptr_idxs = recv_buffer_idxs; 4379 ptr_vals = recv_buffer_vals; 4380 ptr_idxs_is = recv_buffer_idxs_is; 4381 for (i=0;i<n_recvs;i++) { 4382 source_dest = onodes[i]; 4383 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4384 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4385 ptr_idxs += olengths_idxs[i]; 4386 ptr_vals += olengths_vals[i]; 4387 if (nis) { 4388 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4389 ptr_idxs_is += olengths_idxs_is[i]; 4390 } 4391 } 4392 for (i=0;i<n_sends;i++) { 4393 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4394 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4395 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4396 if (nis) { 4397 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4398 } 4399 } 4400 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4401 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4402 4403 /* assemble new l2g map */ 4404 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4405 ptr_idxs = recv_buffer_idxs; 4406 new_local_rows = 0; 4407 for (i=0;i<n_recvs;i++) { 4408 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4409 ptr_idxs += olengths_idxs[i]; 4410 } 4411 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4412 ptr_idxs = recv_buffer_idxs; 4413 new_local_rows = 0; 4414 for (i=0;i<n_recvs;i++) { 4415 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4416 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4417 ptr_idxs += olengths_idxs[i]; 4418 } 4419 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4420 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4421 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4422 4423 /* infer new local matrix type from received local matrices type */ 4424 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4425 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4426 if (n_recvs) { 4427 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4428 ptr_idxs = recv_buffer_idxs; 4429 for (i=0;i<n_recvs;i++) { 4430 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4431 new_local_type_private = MATAIJ_PRIVATE; 4432 break; 4433 } 4434 ptr_idxs += olengths_idxs[i]; 4435 } 4436 switch (new_local_type_private) { 4437 case MATDENSE_PRIVATE: 4438 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4439 new_local_type = MATSEQAIJ; 4440 bs = 1; 4441 } else { /* if I receive only 1 dense matrix */ 4442 new_local_type = MATSEQDENSE; 4443 bs = 1; 4444 } 4445 break; 4446 case MATAIJ_PRIVATE: 4447 new_local_type = MATSEQAIJ; 4448 bs = 1; 4449 break; 4450 case MATBAIJ_PRIVATE: 4451 new_local_type = MATSEQBAIJ; 4452 break; 4453 case MATSBAIJ_PRIVATE: 4454 new_local_type = MATSEQSBAIJ; 4455 break; 4456 default: 4457 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4458 break; 4459 } 4460 } else { /* by default, new_local_type is seqdense */ 4461 new_local_type = MATSEQDENSE; 4462 bs = 1; 4463 } 4464 4465 /* create MATIS object if needed */ 4466 if (reuse == MAT_INITIAL_MATRIX) { 4467 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4468 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4469 } else { 4470 /* it also destroys the local matrices */ 4471 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4472 } 4473 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4474 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4475 4476 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4477 4478 /* Global to local map of received indices */ 4479 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4480 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4481 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4482 4483 /* restore attributes -> type of incoming data and its size */ 4484 buf_size_idxs = 0; 4485 for (i=0;i<n_recvs;i++) { 4486 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4487 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4488 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4489 } 4490 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4491 4492 /* set preallocation */ 4493 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4494 if (!newisdense) { 4495 PetscInt *new_local_nnz=0; 4496 4497 ptr_vals = recv_buffer_vals; 4498 ptr_idxs = recv_buffer_idxs_local; 4499 if (n_recvs) { 4500 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4501 } 4502 for (i=0;i<n_recvs;i++) { 4503 PetscInt j; 4504 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4505 for (j=0;j<*(ptr_idxs+1);j++) { 4506 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4507 } 4508 } else { 4509 /* TODO */ 4510 } 4511 ptr_idxs += olengths_idxs[i]; 4512 } 4513 if (new_local_nnz) { 4514 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4515 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4516 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4517 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4518 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4519 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4520 } else { 4521 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4522 } 4523 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4524 } else { 4525 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4526 } 4527 4528 /* set values */ 4529 ptr_vals = recv_buffer_vals; 4530 ptr_idxs = recv_buffer_idxs_local; 4531 for (i=0;i<n_recvs;i++) { 4532 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4533 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4534 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4535 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4536 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4537 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4538 } else { 4539 /* TODO */ 4540 } 4541 ptr_idxs += olengths_idxs[i]; 4542 ptr_vals += olengths_vals[i]; 4543 } 4544 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4545 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4546 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4547 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4548 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4549 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4550 4551 #if 0 4552 if (!restrict_comm) { /* check */ 4553 Vec lvec,rvec; 4554 PetscReal infty_error; 4555 4556 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4557 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4558 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4559 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4560 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4561 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4562 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4563 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4564 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4565 } 4566 #endif 4567 4568 /* assemble new additional is (if any) */ 4569 if (nis) { 4570 PetscInt **temp_idxs,*count_is,j,psum; 4571 4572 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4573 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4574 ptr_idxs = recv_buffer_idxs_is; 4575 psum = 0; 4576 for (i=0;i<n_recvs;i++) { 4577 for (j=0;j<nis;j++) { 4578 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4579 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4580 psum += plen; 4581 ptr_idxs += plen+1; /* shift pointer to received data */ 4582 } 4583 } 4584 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4585 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4586 for (i=1;i<nis;i++) { 4587 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4588 } 4589 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4590 ptr_idxs = recv_buffer_idxs_is; 4591 for (i=0;i<n_recvs;i++) { 4592 for (j=0;j<nis;j++) { 4593 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4594 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4595 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4596 ptr_idxs += plen+1; /* shift pointer to received data */ 4597 } 4598 } 4599 for (i=0;i<nis;i++) { 4600 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4601 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4602 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4603 } 4604 ierr = PetscFree(count_is);CHKERRQ(ierr); 4605 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4606 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4607 } 4608 /* free workspace */ 4609 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4610 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4611 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4612 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4613 if (isdense) { 4614 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4615 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4616 } else { 4617 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4618 } 4619 if (nis) { 4620 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4621 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4622 } 4623 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4624 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4625 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4626 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4627 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4628 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4629 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4630 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4631 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4632 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4633 ierr = PetscFree(onodes);CHKERRQ(ierr); 4634 if (nis) { 4635 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4636 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4637 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4638 } 4639 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4640 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4641 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4642 for (i=0;i<nis;i++) { 4643 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4644 } 4645 *mat_n = NULL; 4646 } 4647 PetscFunctionReturn(0); 4648 } 4649 4650 /* temporary hack into ksp private data structure */ 4651 #include <petsc/private/kspimpl.h> 4652 4653 #undef __FUNCT__ 4654 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4655 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4656 { 4657 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4658 PC_IS *pcis = (PC_IS*)pc->data; 4659 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4660 MatNullSpace CoarseNullSpace=NULL; 4661 ISLocalToGlobalMapping coarse_islg; 4662 IS coarse_is,*isarray; 4663 PetscInt i,im_active=-1,active_procs=-1; 4664 PetscInt nis,nisdofs,nisneu; 4665 PC pc_temp; 4666 PCType coarse_pc_type; 4667 KSPType coarse_ksp_type; 4668 PetscBool multilevel_requested,multilevel_allowed; 4669 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4670 Mat t_coarse_mat_is; 4671 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4672 PetscMPIInt all_procs; 4673 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4674 PetscBool compute_vecs = PETSC_FALSE; 4675 PetscScalar *array; 4676 PetscErrorCode ierr; 4677 4678 PetscFunctionBegin; 4679 /* Assign global numbering to coarse dofs */ 4680 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4681 PetscInt ocoarse_size; 4682 compute_vecs = PETSC_TRUE; 4683 ocoarse_size = pcbddc->coarse_size; 4684 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4685 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4686 /* see if we can avoid some work */ 4687 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4688 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4689 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4690 PC pc; 4691 PetscBool isbddc; 4692 4693 /* temporary workaround since PCBDDC does not have a reset method so far */ 4694 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4695 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4696 if (isbddc) { 4697 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4698 } 4699 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4700 coarse_reuse = PETSC_FALSE; 4701 } else { /* we can safely reuse already computed coarse matrix */ 4702 coarse_reuse = PETSC_TRUE; 4703 } 4704 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4705 coarse_reuse = PETSC_FALSE; 4706 } 4707 /* reset any subassembling information */ 4708 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4709 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4710 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4711 coarse_reuse = PETSC_TRUE; 4712 } 4713 4714 /* count "active" (i.e. with positive local size) and "void" processes */ 4715 im_active = !!(pcis->n); 4716 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4717 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4718 void_procs = all_procs-active_procs; 4719 csin_type_simple = PETSC_TRUE; 4720 redist = PETSC_FALSE; 4721 if (pcbddc->current_level && void_procs) { 4722 csin_ml = PETSC_TRUE; 4723 ncoarse_ml = void_procs; 4724 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4725 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4726 csin_ds = PETSC_TRUE; 4727 ncoarse_ds = pcbddc->redistribute_coarse; 4728 redist = PETSC_TRUE; 4729 } else { 4730 csin_ds = PETSC_TRUE; 4731 ncoarse_ds = active_procs; 4732 redist = PETSC_TRUE; 4733 } 4734 } else { 4735 csin_ml = PETSC_FALSE; 4736 ncoarse_ml = all_procs; 4737 if (void_procs) { 4738 csin_ds = PETSC_TRUE; 4739 ncoarse_ds = void_procs; 4740 csin_type_simple = PETSC_FALSE; 4741 } else { 4742 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4743 csin_ds = PETSC_TRUE; 4744 ncoarse_ds = pcbddc->redistribute_coarse; 4745 redist = PETSC_TRUE; 4746 } else { 4747 csin_ds = PETSC_FALSE; 4748 ncoarse_ds = all_procs; 4749 } 4750 } 4751 } 4752 4753 /* 4754 test if we can go multilevel: three conditions must be satisfied: 4755 - we have not exceeded the number of levels requested 4756 - we can actually subassemble the active processes 4757 - we can find a suitable number of MPI processes where we can place the subassembled problem 4758 */ 4759 multilevel_allowed = PETSC_FALSE; 4760 multilevel_requested = PETSC_FALSE; 4761 if (pcbddc->current_level < pcbddc->max_levels) { 4762 multilevel_requested = PETSC_TRUE; 4763 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4764 multilevel_allowed = PETSC_FALSE; 4765 } else { 4766 multilevel_allowed = PETSC_TRUE; 4767 } 4768 } 4769 /* determine number of process partecipating to coarse solver */ 4770 if (multilevel_allowed) { 4771 ncoarse = ncoarse_ml; 4772 csin = csin_ml; 4773 redist = PETSC_FALSE; 4774 } else { 4775 ncoarse = ncoarse_ds; 4776 csin = csin_ds; 4777 } 4778 4779 /* creates temporary l2gmap and IS for coarse indexes */ 4780 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4781 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4782 4783 /* creates temporary MATIS object for coarse matrix */ 4784 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4785 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4786 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4787 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4788 #if 0 4789 { 4790 PetscViewer viewer; 4791 char filename[256]; 4792 sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4793 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4794 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4795 ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr); 4796 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4797 } 4798 #endif 4799 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4800 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4801 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4802 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4803 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4804 4805 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4806 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */ 4807 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4808 const PetscInt *idxs; 4809 ISLocalToGlobalMapping tmap; 4810 4811 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4812 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4813 /* allocate space for temporary storage */ 4814 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4815 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4816 /* allocate for IS array */ 4817 nisdofs = pcbddc->n_ISForDofsLocal; 4818 nisneu = !!pcbddc->NeumannBoundariesLocal; 4819 nis = nisdofs + nisneu; 4820 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4821 /* dofs splitting */ 4822 for (i=0;i<nisdofs;i++) { 4823 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4824 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4825 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4826 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4827 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4828 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4829 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4830 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4831 } 4832 /* neumann boundaries */ 4833 if (pcbddc->NeumannBoundariesLocal) { 4834 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4835 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4836 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4837 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4838 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4839 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4840 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4841 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4842 } 4843 /* free memory */ 4844 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4845 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4846 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4847 } else { 4848 nis = 0; 4849 nisdofs = 0; 4850 nisneu = 0; 4851 isarray = NULL; 4852 } 4853 /* destroy no longer needed map */ 4854 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4855 4856 /* restrict on coarse candidates (if needed) */ 4857 coarse_mat_is = NULL; 4858 if (csin) { 4859 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4860 if (redist) { 4861 PetscMPIInt rank; 4862 PetscInt spc,n_spc_p1,dest[1],destsize; 4863 4864 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4865 spc = active_procs/ncoarse; 4866 n_spc_p1 = active_procs%ncoarse; 4867 if (im_active) { 4868 destsize = 1; 4869 if (rank > n_spc_p1*(spc+1)-1) { 4870 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 4871 } else { 4872 dest[0] = rank/(spc+1); 4873 } 4874 } else { 4875 destsize = 0; 4876 } 4877 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4878 } else if (csin_type_simple) { 4879 PetscMPIInt rank; 4880 PetscInt issize,isidx; 4881 4882 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4883 if (im_active) { 4884 issize = 1; 4885 isidx = (PetscInt)rank; 4886 } else { 4887 issize = 0; 4888 isidx = -1; 4889 } 4890 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4891 } else { /* get a suitable subassembling pattern from MATIS code */ 4892 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4893 } 4894 4895 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 4896 if (!redist || ncoarse <= void_procs) { 4897 PetscInt ncoarse_cand,tissize,*nisindices; 4898 PetscInt *coarse_candidates; 4899 const PetscInt* tisindices; 4900 4901 /* get coarse candidates' ranks in pc communicator */ 4902 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 4903 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4904 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 4905 if (!coarse_candidates[i]) { 4906 coarse_candidates[ncoarse_cand++]=i; 4907 } 4908 } 4909 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 4910 4911 4912 if (pcbddc->dbg_flag) { 4913 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4914 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 4915 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4916 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 4917 for (i=0;i<ncoarse_cand;i++) { 4918 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 4919 } 4920 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 4921 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4922 } 4923 /* shift the pattern on coarse candidates */ 4924 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 4925 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4926 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 4927 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 4928 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 4929 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 4930 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 4931 } 4932 if (pcbddc->dbg_flag) { 4933 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4934 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 4935 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 4936 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4937 } 4938 } 4939 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 4940 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 4941 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4942 } else { /* this is the last level, so use just receiving processes in subcomm */ 4943 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 4944 } 4945 } else { 4946 if (pcbddc->dbg_flag) { 4947 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4948 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 4949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4950 } 4951 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 4952 coarse_mat_is = t_coarse_mat_is; 4953 } 4954 4955 /* create local to global scatters for coarse problem */ 4956 if (compute_vecs) { 4957 PetscInt lrows; 4958 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 4959 if (coarse_mat_is) { 4960 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 4961 } else { 4962 lrows = 0; 4963 } 4964 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 4965 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 4966 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 4967 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4968 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 4969 } 4970 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 4971 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 4972 4973 /* set defaults for coarse KSP and PC */ 4974 if (multilevel_allowed) { 4975 coarse_ksp_type = KSPRICHARDSON; 4976 coarse_pc_type = PCBDDC; 4977 } else { 4978 coarse_ksp_type = KSPPREONLY; 4979 coarse_pc_type = PCREDUNDANT; 4980 } 4981 4982 /* print some info if requested */ 4983 if (pcbddc->dbg_flag) { 4984 if (!multilevel_allowed) { 4985 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4986 if (multilevel_requested) { 4987 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 4988 } else if (pcbddc->max_levels) { 4989 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 4990 } 4991 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4992 } 4993 } 4994 4995 /* create the coarse KSP object only once with defaults */ 4996 if (coarse_mat_is) { 4997 MatReuse coarse_mat_reuse; 4998 PetscViewer dbg_viewer = NULL; 4999 if (pcbddc->dbg_flag) { 5000 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5001 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5002 } 5003 if (!pcbddc->coarse_ksp) { 5004 char prefix[256],str_level[16]; 5005 size_t len; 5006 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5007 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5008 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5009 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5010 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5011 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5012 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5013 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5014 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5015 /* prefix */ 5016 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5017 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5018 if (!pcbddc->current_level) { 5019 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5020 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5021 } else { 5022 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5023 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5024 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5025 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5026 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5027 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5028 } 5029 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5030 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5031 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5032 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5033 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5034 /* allow user customization */ 5035 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5036 } 5037 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5038 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5039 if (nisdofs) { 5040 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5041 for (i=0;i<nisdofs;i++) { 5042 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5043 } 5044 } 5045 if (nisneu) { 5046 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5047 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5048 } 5049 5050 /* get some info after set from options */ 5051 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5052 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5053 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5054 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5055 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5056 isbddc = PETSC_FALSE; 5057 } 5058 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5059 if (isredundant) { 5060 KSP inner_ksp; 5061 PC inner_pc; 5062 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5063 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5064 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5065 } 5066 5067 /* assemble coarse matrix */ 5068 if (coarse_reuse) { 5069 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5070 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5071 coarse_mat_reuse = MAT_REUSE_MATRIX; 5072 } else { 5073 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5074 } 5075 if (isbddc || isnn) { 5076 if (pcbddc->coarsening_ratio > 1) { 5077 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5078 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5079 if (pcbddc->dbg_flag) { 5080 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5081 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5082 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5083 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5084 } 5085 } 5086 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5087 } else { 5088 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5089 coarse_mat = coarse_mat_is; 5090 } 5091 } else { 5092 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5093 } 5094 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5095 5096 /* propagate symmetry info of coarse matrix */ 5097 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5098 if (pc->pmat->symmetric_set) { 5099 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5100 } 5101 if (pc->pmat->hermitian_set) { 5102 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5103 } 5104 if (pc->pmat->spd_set) { 5105 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5106 } 5107 /* set operators */ 5108 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5109 if (pcbddc->dbg_flag) { 5110 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5111 } 5112 } else { /* processes non partecipating to coarse solver (if any) */ 5113 coarse_mat = 0; 5114 } 5115 ierr = PetscFree(isarray);CHKERRQ(ierr); 5116 #if 0 5117 { 5118 PetscViewer viewer; 5119 char filename[256]; 5120 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5121 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5122 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5123 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5124 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5125 } 5126 #endif 5127 5128 /* Compute coarse null space (special handling by BDDC only) */ 5129 #if 0 5130 if (pcbddc->NullSpace) { 5131 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5132 } 5133 #endif 5134 /* hack */ 5135 if (pcbddc->coarse_ksp) { 5136 Vec crhs,csol; 5137 5138 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5139 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5140 if (!csol) { 5141 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5142 } 5143 if (!crhs) { 5144 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5145 } 5146 } 5147 5148 /* compute null space for coarse solver if the benign trick has been requested */ 5149 if (pcbddc->benign_null) { 5150 5151 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5152 for (i=0;i<pcbddc->benign_n;i++) { 5153 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5154 } 5155 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5156 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5157 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5158 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5159 if (coarse_mat) { 5160 Vec nullv; 5161 PetscScalar *array,*array2; 5162 PetscInt nl; 5163 5164 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5165 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5166 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5167 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5168 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5169 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5170 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5171 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5172 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5173 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5174 } 5175 } 5176 5177 if (pcbddc->coarse_ksp) { 5178 PetscBool ispreonly; 5179 5180 if (CoarseNullSpace) { 5181 PetscBool isnull; 5182 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5183 if (isnull) { 5184 if (isbddc) { 5185 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5186 } else { 5187 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5188 } 5189 } else { 5190 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5191 } 5192 } 5193 /* setup coarse ksp */ 5194 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5195 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5196 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5197 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5198 KSP check_ksp; 5199 KSPType check_ksp_type; 5200 PC check_pc; 5201 Vec check_vec,coarse_vec; 5202 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5203 PetscInt its; 5204 PetscBool compute_eigs; 5205 PetscReal *eigs_r,*eigs_c; 5206 PetscInt neigs; 5207 const char *prefix; 5208 5209 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5210 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5211 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5212 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5213 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5214 if (ispreonly) { 5215 check_ksp_type = KSPPREONLY; 5216 compute_eigs = PETSC_FALSE; 5217 } else { 5218 check_ksp_type = KSPGMRES; 5219 compute_eigs = PETSC_TRUE; 5220 } 5221 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5222 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5223 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5224 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5225 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5226 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5227 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5228 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5229 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5230 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5231 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5232 /* create random vec */ 5233 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5234 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5235 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5236 if (CoarseNullSpace) { 5237 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5238 } 5239 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5240 /* solve coarse problem */ 5241 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5242 if (CoarseNullSpace) { 5243 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5244 } 5245 /* set eigenvalue estimation if preonly has not been requested */ 5246 if (compute_eigs) { 5247 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5248 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5249 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5250 lambda_max = eigs_r[neigs-1]; 5251 lambda_min = eigs_r[0]; 5252 if (pcbddc->use_coarse_estimates) { 5253 if (lambda_max>lambda_min) { 5254 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5255 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5256 } 5257 } 5258 } 5259 5260 /* check coarse problem residual error */ 5261 if (pcbddc->dbg_flag) { 5262 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5263 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5264 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5265 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5266 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5267 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5268 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5269 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5270 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5271 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5272 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5273 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5274 if (CoarseNullSpace) { 5275 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5276 } 5277 if (compute_eigs) { 5278 PetscReal lambda_max_s,lambda_min_s; 5279 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5280 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5281 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5282 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5283 for (i=0;i<neigs;i++) { 5284 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5285 } 5286 } 5287 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5288 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5289 } 5290 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5291 if (compute_eigs) { 5292 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5293 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5294 } 5295 } 5296 } 5297 /* print additional info */ 5298 if (pcbddc->dbg_flag) { 5299 /* waits until all processes reaches this point */ 5300 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5301 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5302 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5303 } 5304 5305 /* free memory */ 5306 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5307 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5308 PetscFunctionReturn(0); 5309 } 5310 5311 #undef __FUNCT__ 5312 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5313 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5314 { 5315 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5316 PC_IS* pcis = (PC_IS*)pc->data; 5317 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5318 IS subset,subset_mult,subset_n; 5319 PetscInt local_size,coarse_size=0; 5320 PetscInt *local_primal_indices=NULL; 5321 const PetscInt *t_local_primal_indices; 5322 PetscErrorCode ierr; 5323 5324 PetscFunctionBegin; 5325 /* Compute global number of coarse dofs */ 5326 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5327 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5328 } 5329 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5330 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5331 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5332 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5333 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5334 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5335 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5336 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5337 if (local_size != pcbddc->local_primal_size) { 5338 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5339 } 5340 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5341 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5342 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5343 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5344 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5345 5346 /* check numbering */ 5347 if (pcbddc->dbg_flag) { 5348 PetscScalar coarsesum,*array,*array2; 5349 PetscInt i; 5350 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5351 5352 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5353 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5354 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5355 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5356 /* counter */ 5357 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5358 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5359 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5360 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5361 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5362 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5363 5364 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5365 for (i=0;i<pcbddc->local_primal_size;i++) { 5366 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5367 } 5368 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5369 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5370 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5371 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5372 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5373 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5374 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5375 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5376 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5377 for (i=0;i<pcis->n;i++) { 5378 if (array[i] != 0.0 && array[i] != array2[i]) { 5379 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5380 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5381 set_error = PETSC_TRUE; 5382 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5383 } 5384 } 5385 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5386 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5388 for (i=0;i<pcis->n;i++) { 5389 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5390 } 5391 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5392 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5393 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5394 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5395 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5397 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5398 PetscInt *gidxs; 5399 5400 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5401 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5402 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5403 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5405 for (i=0;i<pcbddc->local_primal_size;i++) { 5406 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5407 } 5408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5409 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5410 } 5411 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5412 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5413 } 5414 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5415 /* get back data */ 5416 *coarse_size_n = coarse_size; 5417 *local_primal_indices_n = local_primal_indices; 5418 PetscFunctionReturn(0); 5419 } 5420 5421 #undef __FUNCT__ 5422 #define __FUNCT__ "PCBDDCGlobalToLocal" 5423 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5424 { 5425 IS localis_t; 5426 PetscInt i,lsize,*idxs,n; 5427 PetscScalar *vals; 5428 PetscErrorCode ierr; 5429 5430 PetscFunctionBegin; 5431 /* get indices in local ordering exploiting local to global map */ 5432 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5433 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5434 for (i=0;i<lsize;i++) vals[i] = 1.0; 5435 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5436 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5437 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5438 if (idxs) { /* multilevel guard */ 5439 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5440 } 5441 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5442 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5443 ierr = PetscFree(vals);CHKERRQ(ierr); 5444 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5445 /* now compute set in local ordering */ 5446 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5447 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5448 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5449 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5450 for (i=0,lsize=0;i<n;i++) { 5451 if (PetscRealPart(vals[i]) > 0.5) { 5452 lsize++; 5453 } 5454 } 5455 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5456 for (i=0,lsize=0;i<n;i++) { 5457 if (PetscRealPart(vals[i]) > 0.5) { 5458 idxs[lsize++] = i; 5459 } 5460 } 5461 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5462 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5463 *localis = localis_t; 5464 PetscFunctionReturn(0); 5465 } 5466 5467 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5468 #undef __FUNCT__ 5469 #define __FUNCT__ "PCBDDCMatMult_Private" 5470 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5471 { 5472 PCBDDCChange_ctx change_ctx; 5473 PetscErrorCode ierr; 5474 5475 PetscFunctionBegin; 5476 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5477 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5478 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5479 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5480 PetscFunctionReturn(0); 5481 } 5482 5483 #undef __FUNCT__ 5484 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5485 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5486 { 5487 PCBDDCChange_ctx change_ctx; 5488 PetscErrorCode ierr; 5489 5490 PetscFunctionBegin; 5491 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5492 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5493 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5494 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5495 PetscFunctionReturn(0); 5496 } 5497 5498 #undef __FUNCT__ 5499 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5500 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5501 { 5502 PC_IS *pcis=(PC_IS*)pc->data; 5503 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5504 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5505 Mat S_j; 5506 PetscInt *used_xadj,*used_adjncy; 5507 PetscBool free_used_adj; 5508 PetscErrorCode ierr; 5509 5510 PetscFunctionBegin; 5511 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5512 free_used_adj = PETSC_FALSE; 5513 if (pcbddc->sub_schurs_layers == -1) { 5514 used_xadj = NULL; 5515 used_adjncy = NULL; 5516 } else { 5517 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5518 used_xadj = pcbddc->mat_graph->xadj; 5519 used_adjncy = pcbddc->mat_graph->adjncy; 5520 } else if (pcbddc->computed_rowadj) { 5521 used_xadj = pcbddc->mat_graph->xadj; 5522 used_adjncy = pcbddc->mat_graph->adjncy; 5523 } else { 5524 PetscBool flg_row=PETSC_FALSE; 5525 const PetscInt *xadj,*adjncy; 5526 PetscInt nvtxs; 5527 5528 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5529 if (flg_row) { 5530 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5531 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5532 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5533 free_used_adj = PETSC_TRUE; 5534 } else { 5535 pcbddc->sub_schurs_layers = -1; 5536 used_xadj = NULL; 5537 used_adjncy = NULL; 5538 } 5539 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5540 } 5541 } 5542 5543 /* setup sub_schurs data */ 5544 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5545 if (!sub_schurs->use_mumps) { 5546 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5547 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5548 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5549 } else { 5550 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5551 PetscBool isseqaij; 5552 if (!pcbddc->use_vertices && reuse_solvers) { 5553 PetscInt n_vertices; 5554 5555 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5556 reuse_solvers = (PetscBool)!n_vertices; 5557 } 5558 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5559 if (!isseqaij) { 5560 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5561 if (matis->A == pcbddc->local_mat) { 5562 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5563 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5564 } else { 5565 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5566 } 5567 } 5568 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5569 } 5570 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5571 5572 /* free adjacency */ 5573 if (free_used_adj) { 5574 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5575 } 5576 PetscFunctionReturn(0); 5577 } 5578 5579 #undef __FUNCT__ 5580 #define __FUNCT__ "PCBDDCInitSubSchurs" 5581 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5582 { 5583 PC_IS *pcis=(PC_IS*)pc->data; 5584 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5585 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5586 PCBDDCGraph graph; 5587 PetscErrorCode ierr; 5588 5589 PetscFunctionBegin; 5590 /* attach interface graph for determining subsets */ 5591 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5592 IS verticesIS,verticescomm; 5593 PetscInt vsize,*idxs; 5594 5595 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5596 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5597 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5598 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5599 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5600 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5601 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5602 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5603 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5604 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5605 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5606 /* 5607 if (pcbddc->dbg_flag) { 5608 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5609 } 5610 */ 5611 } else { 5612 graph = pcbddc->mat_graph; 5613 } 5614 5615 /* sub_schurs init */ 5616 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5617 5618 /* free graph struct */ 5619 if (pcbddc->sub_schurs_rebuild) { 5620 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5621 } 5622 PetscFunctionReturn(0); 5623 } 5624 5625 #undef __FUNCT__ 5626 #define __FUNCT__ "PCBDDCCheckOperator" 5627 PetscErrorCode PCBDDCCheckOperator(PC pc) 5628 { 5629 PC_IS *pcis=(PC_IS*)pc->data; 5630 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5631 PetscErrorCode ierr; 5632 5633 PetscFunctionBegin; 5634 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5635 IS zerodiag = NULL; 5636 Mat S_j,B0_B=NULL; 5637 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5638 PetscScalar *p0_check,*array,*array2; 5639 PetscReal norm; 5640 PetscInt i; 5641 5642 /* B0 and B0_B */ 5643 if (zerodiag) { 5644 IS dummy; 5645 5646 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5647 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5648 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5649 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5650 } 5651 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5652 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5653 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5654 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5655 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5656 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5657 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5658 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5659 /* S_j */ 5660 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5661 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5662 5663 /* mimic vector in \widetilde{W}_\Gamma */ 5664 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5665 /* continuous in primal space */ 5666 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5667 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5668 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5669 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5670 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5671 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5672 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5673 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5674 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5675 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5676 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5677 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5678 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5679 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5680 5681 /* assemble rhs for coarse problem */ 5682 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5683 /* local with Schur */ 5684 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5685 if (zerodiag) { 5686 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5687 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5688 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5689 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5690 } 5691 /* sum on primal nodes the local contributions */ 5692 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5693 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5694 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5695 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5696 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5697 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5698 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5699 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5700 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5701 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5702 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5703 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5704 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5705 /* scale primal nodes (BDDC sums contibutions) */ 5706 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5707 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5708 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5709 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5710 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5711 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5712 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5713 /* global: \widetilde{B0}_B w_\Gamma */ 5714 if (zerodiag) { 5715 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5716 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5717 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5718 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5719 } 5720 /* BDDC */ 5721 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5722 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5723 5724 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5725 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5726 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5727 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5728 for (i=0;i<pcbddc->benign_n;i++) { 5729 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5730 } 5731 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5732 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5733 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5734 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5735 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5736 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5737 } 5738 PetscFunctionReturn(0); 5739 } 5740