1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 /* TODO: add reuse flag */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqAIJCompress" 12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 13 { 14 Mat Bt; 15 PetscScalar *a,*bdata; 16 const PetscInt *ii,*ij; 17 PetscInt m,n,i,nnz,*bii,*bij; 18 PetscBool flg_row; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 23 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 24 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 25 nnz = n; 26 for (i=0;i<ii[n];i++) { 27 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 28 } 29 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 30 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 31 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 32 nnz = 0; 33 bii[0] = 0; 34 for (i=0;i<n;i++) { 35 PetscInt j; 36 for (j=ii[i];j<ii[i+1];j++) { 37 PetscScalar entry = a[j]; 38 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 39 bij[nnz] = ij[j]; 40 bdata[nnz] = entry; 41 nnz++; 42 } 43 } 44 bii[i+1] = nnz; 45 } 46 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 47 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 48 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 49 { 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 51 b->free_a = PETSC_TRUE; 52 b->free_ij = PETSC_TRUE; 53 } 54 *B = Bt; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDetectDisconnectedComponents" 60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 61 { 62 Mat B; 63 IS is_dummy,*cc_n; 64 ISLocalToGlobalMapping l2gmap_dummy; 65 PCBDDCGraph graph; 66 PetscInt i,n; 67 PetscInt *xadj,*adjncy; 68 PetscInt *xadj_filtered,*adjncy_filtered; 69 PetscBool flg_row,isseqaij; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 74 if (!isseqaij && filter) { 75 PetscBool isseqdense; 76 77 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 78 if (!isseqdense) { 79 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 80 } else { /* TODO: rectangular case and LDA */ 81 PetscScalar *array; 82 PetscReal chop=1.e-6; 83 84 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 86 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 87 for (i=0;i<n;i++) { 88 PetscInt j; 89 for (j=i+1;j<n;j++) { 90 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 91 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 92 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 93 } 94 } 95 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 96 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 97 } 98 } else { 99 B = A; 100 } 101 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 102 103 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 104 if (filter) { 105 PetscScalar *data; 106 PetscInt j,cum; 107 108 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 109 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 110 cum = 0; 111 for (i=0;i<n;i++) { 112 PetscInt t; 113 114 for (j=xadj[i];j<xadj[i+1];j++) { 115 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 116 continue; 117 } 118 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 119 } 120 t = xadj_filtered[i]; 121 xadj_filtered[i] = cum; 122 cum += t; 123 } 124 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 125 } else { 126 xadj_filtered = NULL; 127 adjncy_filtered = NULL; 128 } 129 130 /* compute local connected components using PCBDDCGraph */ 131 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 132 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 133 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 135 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 136 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 137 if (xadj_filtered) { 138 graph->xadj = xadj_filtered; 139 graph->adjncy = adjncy_filtered; 140 } else { 141 graph->xadj = xadj; 142 graph->adjncy = adjncy; 143 } 144 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 145 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 146 147 /* partial clean up */ 148 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 149 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 150 if (A != B) { 151 ierr = MatDestroy(&B);CHKERRQ(ierr); 152 } 153 154 /* get back data */ 155 if (ncc) *ncc = graph->ncc; 156 if (cc) { 157 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 158 for (i=0;i<graph->ncc;i++) { 159 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 160 } 161 *cc = cc_n; 162 } 163 /* clean up graph */ 164 graph->xadj = 0; 165 graph->adjncy = 0; 166 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "PCBDDCBenignCheck" 172 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 173 { 174 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 175 PC_IS* pcis = (PC_IS*)(pc->data); 176 IS dirIS = NULL; 177 PetscInt i; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 182 if (zerodiag) { 183 Mat A; 184 Vec vec3_N; 185 PetscScalar *vals; 186 const PetscInt *idxs; 187 PetscInt nz; 188 189 /* p0 */ 190 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 191 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 192 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 193 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 194 for (i=0;i<nz;i++) vals[i] = 1.; 195 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 196 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 197 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 198 /* v_I */ 199 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 200 for (i=0;i<nz;i++) vals[i] = 0.; 201 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 202 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 203 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 204 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 205 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 206 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 207 if (dirIS) { 208 PetscInt n; 209 210 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 211 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 212 for (i=0;i<n;i++) vals[i] = 0.; 213 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 214 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 215 } 216 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 217 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 218 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 219 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 220 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 221 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 222 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 223 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 224 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 225 } 226 ierr = PetscFree(vals);CHKERRQ(ierr); 227 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 228 } 229 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 230 231 /* check PCBDDCBenignGetOrSetP0 */ 232 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 233 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 234 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 235 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 236 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 237 for (i=0;i<pcbddc->benign_n;i++) { 238 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 239 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 240 } 241 } 242 PetscFunctionReturn(0); 243 } 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 247 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 248 { 249 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 250 IS pressures,zerodiag,*zerodiag_subs; 251 PetscInt nz,n; 252 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 257 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 258 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 259 ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr); 260 pcbddc->benign_original_mat = pcbddc->local_mat; 261 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 262 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 263 Checks if all the pressure dofs in each subdomain have a zero diagonal 264 If not, a change of basis on pressures is not needed 265 since the local Schur complements are SPD 266 */ 267 has_null_pressures = PETSC_TRUE; 268 have_null = PETSC_TRUE; 269 if (pcbddc->n_ISForDofsLocal) { 270 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 271 272 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 273 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 274 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 275 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 276 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 277 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 278 if (!sorted) { 279 ierr = ISSort(pressures);CHKERRQ(ierr); 280 } 281 } else { 282 pressures = NULL; 283 } 284 ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr); 285 /* TODO: add check for shared dofs and raise error */ 286 ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr); 287 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 288 if (!sorted) { 289 ierr = ISSort(zerodiag);CHKERRQ(ierr); 290 } 291 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 292 if (!nz) { 293 if (n) have_null = PETSC_FALSE; 294 has_null_pressures = PETSC_FALSE; 295 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 296 } 297 recompute_zerodiag = PETSC_FALSE; 298 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 299 zerodiag_subs = NULL; 300 pcbddc->benign_n = 0; 301 if (has_null_pressures) { 302 IS *subs; 303 PetscInt nsubs,i; 304 305 subs = pcbddc->local_subs; 306 nsubs = pcbddc->n_local_subs; 307 if (nsubs > 1) { 308 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 309 for (i=0;i<nsubs;i++) { 310 ISLocalToGlobalMapping l2g; 311 IS t_zerodiag_subs; 312 PetscInt nl; 313 314 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 315 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 316 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 317 if (nl) { 318 PetscBool valid = PETSC_TRUE; 319 320 if (pressures) { 321 IS t_pressure_subs; 322 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 323 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 324 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 325 } 326 if (valid) { 327 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 328 pcbddc->benign_n++; 329 } else { 330 recompute_zerodiag = PETSC_TRUE; 331 } 332 } 333 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 334 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 335 } 336 } else { /* there's just one subdomain (or zero if they have not been detected */ 337 PetscBool valid = PETSC_TRUE; 338 if (pressures) { 339 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 340 } 341 if (valid) { 342 pcbddc->benign_n = 1; 343 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 344 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 345 zerodiag_subs[0] = zerodiag; 346 } 347 } 348 } 349 350 if (!pcbddc->benign_n) { 351 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 352 recompute_zerodiag = PETSC_FALSE; 353 has_null_pressures = PETSC_FALSE; 354 have_null = PETSC_FALSE; 355 } 356 357 /* final check for null pressures */ 358 if (zerodiag && pressures) { 359 PetscInt nz,np; 360 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 361 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 362 if (nz != np) have_null = PETSC_FALSE; 363 } 364 365 if (recompute_zerodiag) { 366 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 367 if (pcbddc->benign_n == 1) { 368 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 369 zerodiag = zerodiag_subs[0]; 370 } else { 371 PetscInt i,nzn,*new_idxs; 372 373 nzn = 0; 374 for (i=0;i<pcbddc->benign_n;i++) { 375 PetscInt ns; 376 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 377 nzn += ns; 378 } 379 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 380 nzn = 0; 381 for (i=0;i<pcbddc->benign_n;i++) { 382 PetscInt ns,*idxs; 383 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 384 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 385 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 386 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 387 nzn += ns; 388 } 389 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 390 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 391 } 392 have_null = PETSC_FALSE; 393 } 394 395 if (has_null_pressures) { 396 IS zerodiagc; 397 const PetscInt *idxs,*idxsc; 398 PetscInt i,s,*nnz; 399 Mat M; 400 401 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 402 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 403 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 404 /* local change of basis for pressures */ 405 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 406 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr); 407 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 408 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 409 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 410 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 411 for (i=0;i<pcbddc->benign_n;i++) { 412 PetscInt nzs,j; 413 414 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 415 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 416 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 417 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 418 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 419 } 420 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 421 ierr = PetscFree(nnz);CHKERRQ(ierr); 422 /* set identity on velocities */ 423 for (i=0;i<n-nz;i++) { 424 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 425 } 426 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 427 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 428 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 429 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 430 /* set change on pressures */ 431 for (s=0;s<pcbddc->benign_n;s++) { 432 PetscScalar *array; 433 PetscInt nzs; 434 435 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 436 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 437 for (i=0;i<nzs-1;i++) { 438 PetscScalar vals[2]; 439 PetscInt cols[2]; 440 441 cols[0] = idxs[i]; 442 cols[1] = idxs[nzs-1]; 443 vals[0] = 1.; 444 vals[1] = 1.; 445 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 446 } 447 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 448 for (i=0;i<nzs-1;i++) array[i] = -1.; 449 array[nzs-1] = 1.; 450 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 451 /* store local idxs for p0 */ 452 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 453 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 454 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 455 ierr = PetscFree(array);CHKERRQ(ierr); 456 } 457 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 458 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 459 /* TODO: need optimization? */ 460 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 461 ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 462 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 463 ierr = MatDestroy(&M);CHKERRQ(ierr); 464 /* store global idxs for p0 */ 465 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 466 } 467 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 468 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 469 470 /* determines if the coarse solver will be singular or not */ 471 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 472 *zerodiaglocal = zerodiag; 473 PetscFunctionReturn(0); 474 } 475 476 #undef __FUNCT__ 477 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 478 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 479 { 480 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 if (!pcbddc->benign_sf) { 485 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 486 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 487 } 488 if (get) { /* use SF to get values */ 489 PetscScalar *array; 490 491 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 492 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 493 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 494 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 495 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 496 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 497 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 498 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 499 } 500 PetscFunctionReturn(0); 501 } 502 503 #undef __FUNCT__ 504 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 505 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 506 { 507 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 508 PetscErrorCode ierr; 509 510 PetscFunctionBegin; 511 /* TODO: add error checking 512 - avoid nested pop (or push) calls. 513 - cannot push before pop. 514 - cannot call this if pcbddc->local_mat is NULL 515 */ 516 if (!pcbddc->benign_n) { 517 PetscFunctionReturn(0); 518 } 519 if (pop) { 520 IS is_p0; 521 MatReuse reuse; 522 523 /* extract B_0 */ 524 reuse = MAT_INITIAL_MATRIX; 525 if (pcbddc->benign_B0) { 526 reuse = MAT_REUSE_MATRIX; 527 } 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 529 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 530 /* remove rows and cols from local problem */ 531 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 532 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 533 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 534 } else { /* push */ 535 PetscInt i; 536 537 for (i=0;i<pcbddc->benign_n;i++) { 538 PetscScalar *B0_vals; 539 PetscInt *B0_cols,B0_ncol; 540 541 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 542 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 543 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 544 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 545 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 546 } 547 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 } 550 PetscFunctionReturn(0); 551 } 552 553 #undef __FUNCT__ 554 #define __FUNCT__ "PCBDDCAdaptiveSelection" 555 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 556 { 557 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 558 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 559 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 560 PetscBLASInt *B_iwork,*B_ifail; 561 PetscScalar *work,lwork; 562 PetscScalar *St,*S,*eigv; 563 PetscScalar *Sarray,*Starray; 564 PetscReal *eigs,thresh; 565 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 566 PetscBool allocated_S_St; 567 #if defined(PETSC_USE_COMPLEX) 568 PetscReal *rwork; 569 #endif 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 if (!sub_schurs->use_mumps) { 574 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 575 } 576 577 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 578 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 579 } 580 581 if (pcbddc->dbg_flag) { 582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 584 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 585 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 590 } 591 592 /* max size of subsets */ 593 mss = 0; 594 for (i=0;i<sub_schurs->n_subs;i++) { 595 PetscInt subset_size; 596 597 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 598 mss = PetscMax(mss,subset_size); 599 } 600 601 /* min/max and threshold */ 602 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 603 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 604 nmax = PetscMax(nmin,nmax); 605 allocated_S_St = PETSC_FALSE; 606 if (nmin) { 607 allocated_S_St = PETSC_TRUE; 608 } 609 610 /* allocate lapack workspace */ 611 cum = cum2 = 0; 612 maxneigs = 0; 613 for (i=0;i<sub_schurs->n_subs;i++) { 614 PetscInt n,subset_size; 615 616 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 617 n = PetscMin(subset_size,nmax); 618 cum += subset_size; 619 cum2 += subset_size*n; 620 maxneigs = PetscMax(maxneigs,n); 621 } 622 if (mss) { 623 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 624 PetscBLASInt B_itype = 1; 625 PetscBLASInt B_N = mss; 626 PetscReal zero = 0.0; 627 PetscReal eps = 0.0; /* dlamch? */ 628 629 B_lwork = -1; 630 S = NULL; 631 St = NULL; 632 eigs = NULL; 633 eigv = NULL; 634 B_iwork = NULL; 635 B_ifail = NULL; 636 #if defined(PETSC_USE_COMPLEX) 637 rwork = NULL; 638 #endif 639 thresh = 1.0; 640 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 641 #if defined(PETSC_USE_COMPLEX) 642 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 643 #else 644 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 645 #endif 646 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 647 ierr = PetscFPTrapPop();CHKERRQ(ierr); 648 } else { 649 /* TODO */ 650 } 651 } else { 652 lwork = 0; 653 } 654 655 nv = 0; 656 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 657 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 658 } 659 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 660 if (allocated_S_St) { 661 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 662 } 663 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 664 #if defined(PETSC_USE_COMPLEX) 665 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 666 #endif 667 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 668 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 669 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 670 nv+cum,&pcbddc->adaptive_constraints_idxs, 671 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 672 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 673 674 maxneigs = 0; 675 cum = cum2 = cumarray = 0; 676 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 677 pcbddc->adaptive_constraints_data_ptr[0] = 0; 678 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 679 const PetscInt *idxs; 680 681 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 682 for (cum=0;cum<nv;cum++) { 683 pcbddc->adaptive_constraints_n[cum] = 1; 684 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 685 pcbddc->adaptive_constraints_data[cum] = 1.0; 686 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 687 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 688 } 689 cum2 = cum; 690 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 691 } 692 693 if (mss) { /* multilevel */ 694 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 695 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 696 } 697 698 thresh = pcbddc->adaptive_threshold; 699 for (i=0;i<sub_schurs->n_subs;i++) { 700 const PetscInt *idxs; 701 PetscReal infty = PETSC_MAX_REAL; 702 PetscInt j,subset_size,eigs_start = 0; 703 PetscBLASInt B_N; 704 PetscBool same_data = PETSC_FALSE; 705 706 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 707 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 708 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 709 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 710 if (sub_schurs->is_hermitian) { 711 PetscInt j,k; 712 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 713 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 714 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 715 } 716 for (j=0;j<subset_size;j++) { 717 for (k=j;k<subset_size;k++) { 718 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 719 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 720 } 721 } 722 } else { 723 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 724 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 725 } 726 } else { 727 S = Sarray + cumarray; 728 St = Starray + cumarray; 729 } 730 731 /* see if we can save some work */ 732 if (sub_schurs->n_subs == 1) { 733 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 734 } 735 736 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 737 B_neigs = 0; 738 } else { 739 if (nmin) { 740 Mat SM,StM; 741 PetscInt j,k,nccs,nccst; 742 743 for (j=0;j<subset_size;j++) { 744 for (k=j;k<subset_size;k++) { 745 S [k*subset_size+j] = S [j*subset_size+k]; 746 St[k*subset_size+j] = St[j*subset_size+k]; 747 } 748 } 749 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,S,&SM);CHKERRQ(ierr); 750 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,St,&StM);CHKERRQ(ierr); 751 ierr = MatDetectDisconnectedComponents(SM,PETSC_TRUE,&nccs,NULL);CHKERRQ(ierr); 752 ierr = MatDetectDisconnectedComponents(StM,PETSC_TRUE,&nccst,NULL);CHKERRQ(ierr); 753 if (nccs != 1 || nccst != 1) { 754 PetscPrintf(PETSC_COMM_SELF,"[%d] Found disc %d %d (size %d)\n",PetscGlobalRank,nccs,nccst,subset_size); 755 } 756 ierr = MatDestroy(&SM);CHKERRQ(ierr); 757 ierr = MatDestroy(&StM);CHKERRQ(ierr); 758 } 759 760 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 761 PetscBLASInt B_itype = 1; 762 PetscBLASInt B_IL, B_IU; 763 PetscReal eps = -1.0; /* dlamch? */ 764 PetscInt nmin_s; 765 766 if (pcbddc->dbg_flag) { 767 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 768 } 769 770 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 771 if (thresh > 1.+PETSC_SMALL) { 772 773 /* ask for eigenvalues larger than thresh */ 774 #if defined(PETSC_USE_COMPLEX) 775 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 776 #else 777 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 778 #endif 779 } else { 780 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 781 B_IL = 1; 782 #if defined(PETSC_USE_COMPLEX) 783 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 784 #else 785 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 786 #endif 787 } 788 ierr = PetscFPTrapPop();CHKERRQ(ierr); 789 if (B_ierr) { 790 if (B_ierr < 0 ) { 791 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 792 } else if (B_ierr <= B_N) { 793 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 794 } else { 795 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 796 } 797 } 798 799 if (B_neigs > nmax) { 800 if (pcbddc->dbg_flag) { 801 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 802 } 803 eigs_start = B_neigs -nmax; 804 B_neigs = nmax; 805 } 806 807 nmin_s = PetscMin(nmin,B_N); 808 if (B_neigs < nmin_s) { 809 PetscBLASInt B_neigs2; 810 811 B_IU = B_N - B_neigs; 812 B_IL = B_N - nmin_s + 1; 813 if (pcbddc->dbg_flag) { 814 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 815 } 816 if (sub_schurs->is_hermitian) { 817 PetscInt j; 818 for (j=0;j<subset_size;j++) { 819 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 820 } 821 for (j=0;j<subset_size;j++) { 822 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 823 } 824 } else { 825 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 826 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 827 } 828 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 829 #if defined(PETSC_USE_COMPLEX) 830 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 831 #else 832 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 833 #endif 834 ierr = PetscFPTrapPop();CHKERRQ(ierr); 835 B_neigs += B_neigs2; 836 } 837 if (B_ierr) { 838 if (B_ierr < 0 ) { 839 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 840 } else if (B_ierr <= B_N) { 841 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 842 } else { 843 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 844 } 845 } 846 if (pcbddc->dbg_flag) { 847 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 848 for (j=0;j<B_neigs;j++) { 849 if (eigs[j] == 0.0) { 850 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 851 } else { 852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 853 } 854 } 855 } 856 } else { 857 /* TODO */ 858 } 859 } 860 maxneigs = PetscMax(B_neigs,maxneigs); 861 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 862 if (B_neigs) { 863 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 864 865 if (pcbddc->dbg_flag > 1) { 866 PetscInt ii; 867 for (ii=0;ii<B_neigs;ii++) { 868 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 869 for (j=0;j<B_N;j++) { 870 #if defined(PETSC_USE_COMPLEX) 871 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 872 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 873 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 874 #else 875 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 876 #endif 877 } 878 } 879 } 880 #if 0 881 for (j=0;j<B_neigs;j++) { 882 PetscBLASInt Blas_N,Blas_one = 1.0; 883 PetscScalar norm; 884 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 885 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 886 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 887 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 888 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 889 } else { 890 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 891 } 892 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 893 } 894 #endif 895 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 896 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 897 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 898 cum++; 899 } 900 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 901 /* shift for next computation */ 902 cumarray += subset_size*subset_size; 903 } 904 if (pcbddc->dbg_flag) { 905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 906 } 907 908 if (mss) { 909 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 910 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 911 /* destroy matrices (junk) */ 912 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 913 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 914 } 915 if (allocated_S_St) { 916 ierr = PetscFree2(S,St);CHKERRQ(ierr); 917 } 918 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 919 #if defined(PETSC_USE_COMPLEX) 920 ierr = PetscFree(rwork);CHKERRQ(ierr); 921 #endif 922 if (pcbddc->dbg_flag) { 923 PetscInt maxneigs_r; 924 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 925 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 926 } 927 PetscFunctionReturn(0); 928 } 929 930 #undef __FUNCT__ 931 #define __FUNCT__ "PCBDDCSetUpSolvers" 932 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 933 { 934 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 935 PetscScalar *coarse_submat_vals; 936 PetscErrorCode ierr; 937 938 PetscFunctionBegin; 939 /* Setup local scatters R_to_B and (optionally) R_to_D */ 940 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 941 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 942 943 /* Setup local neumann solver ksp_R */ 944 /* PCBDDCSetUpLocalScatters should be called first! */ 945 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 946 947 /* Change global null space passed in by the user if change of basis has been requested */ 948 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 949 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 950 } 951 952 /* 953 Setup local correction and local part of coarse basis. 954 Gives back the dense local part of the coarse matrix in column major ordering 955 */ 956 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 957 958 /* Compute total number of coarse nodes and setup coarse solver */ 959 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 960 961 /* free */ 962 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "PCBDDCResetCustomization" 968 PetscErrorCode PCBDDCResetCustomization(PC pc) 969 { 970 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 975 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 976 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 977 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 978 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 979 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 980 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 981 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 982 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 983 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 984 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 985 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "PCBDDCResetTopography" 991 PetscErrorCode PCBDDCResetTopography(PC pc) 992 { 993 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 994 PetscInt i; 995 PetscErrorCode ierr; 996 997 PetscFunctionBegin; 998 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 999 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1000 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1001 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 1002 for (i=0;i<pcbddc->n_local_subs;i++) { 1003 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1004 } 1005 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1006 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "PCBDDCResetSolvers" 1012 PetscErrorCode PCBDDCResetSolvers(PC pc) 1013 { 1014 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1015 PetscScalar *array; 1016 PetscErrorCode ierr; 1017 1018 PetscFunctionBegin; 1019 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 1020 if (pcbddc->coarse_phi_B) { 1021 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 1022 ierr = PetscFree(array);CHKERRQ(ierr); 1023 } 1024 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1025 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1026 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1027 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1028 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1029 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1030 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1031 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1032 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1033 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1034 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1035 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1036 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1037 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1038 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1039 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1040 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1041 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1042 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1043 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1044 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1045 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1046 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1047 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1048 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1049 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1050 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1051 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1052 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1053 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1059 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1060 { 1061 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1062 PC_IS *pcis = (PC_IS*)pc->data; 1063 VecType impVecType; 1064 PetscInt n_constraints,n_R,old_size; 1065 PetscErrorCode ierr; 1066 1067 PetscFunctionBegin; 1068 if (!pcbddc->ConstraintMatrix) { 1069 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1070 } 1071 /* get sizes */ 1072 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1073 n_R = pcis->n - pcbddc->n_vertices; 1074 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1075 /* local work vectors (try to avoid unneeded work)*/ 1076 /* R nodes */ 1077 old_size = -1; 1078 if (pcbddc->vec1_R) { 1079 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1080 } 1081 if (n_R != old_size) { 1082 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1083 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1084 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1085 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1086 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1087 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1088 } 1089 /* local primal dofs */ 1090 old_size = -1; 1091 if (pcbddc->vec1_P) { 1092 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1093 } 1094 if (pcbddc->local_primal_size != old_size) { 1095 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1096 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1097 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1098 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1099 } 1100 /* local explicit constraints */ 1101 old_size = -1; 1102 if (pcbddc->vec1_C) { 1103 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1104 } 1105 if (n_constraints && n_constraints != old_size) { 1106 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1107 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1108 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1109 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1110 } 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNCT__ 1115 #define __FUNCT__ "PCBDDCSetUpCorrection" 1116 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1117 { 1118 PetscErrorCode ierr; 1119 /* pointers to pcis and pcbddc */ 1120 PC_IS* pcis = (PC_IS*)pc->data; 1121 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1122 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1123 /* submatrices of local problem */ 1124 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1125 /* submatrices of benign trick */ 1126 Mat B0_V = NULL; 1127 /* submatrices of local coarse problem */ 1128 Mat S_VV,S_CV,S_VC,S_CC; 1129 /* working matrices */ 1130 Mat C_CR; 1131 /* additional working stuff */ 1132 PC pc_R; 1133 Mat F; 1134 PetscBool isLU,isCHOL,isILU; 1135 1136 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1137 PetscScalar *work; 1138 PetscInt *idx_V_B; 1139 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1140 PetscInt i,n_R,n_D,n_B; 1141 1142 /* some shortcuts to scalars */ 1143 PetscScalar one=1.0,m_one=-1.0; 1144 1145 PetscFunctionBegin; 1146 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1147 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1148 } 1149 1150 /* Set Non-overlapping dimensions */ 1151 n_vertices = pcbddc->n_vertices; 1152 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1153 n_B = pcis->n_B; 1154 n_D = pcis->n - n_B; 1155 n_R = pcis->n - n_vertices; 1156 1157 /* vertices in boundary numbering */ 1158 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1159 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1160 if (i != n_vertices) { 1161 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1162 } 1163 1164 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1165 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1166 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1167 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1168 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1169 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1170 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1171 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1172 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1173 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1174 1175 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1176 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1177 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1178 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1179 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1180 lda_rhs = n_R; 1181 if (isLU || isILU || isCHOL) { 1182 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1183 } else if (sub_schurs->reuse_mumps) { 1184 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1185 MatFactorType type; 1186 1187 F = reuse_mumps->F; 1188 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1189 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1190 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1191 } else { 1192 F = NULL; 1193 } 1194 1195 /* allocate workspace */ 1196 n = 0; 1197 if (n_constraints) { 1198 n += lda_rhs*n_constraints; 1199 } 1200 if (n_vertices) { 1201 n = PetscMax(2*lda_rhs*n_vertices,n); 1202 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1203 } 1204 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1205 1206 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1207 if (n_constraints) { 1208 Mat M1,M2,M3; 1209 Mat auxmat; 1210 IS is_aux; 1211 PetscScalar *array,*array2; 1212 1213 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1214 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1215 1216 /* Extract constraints on R nodes: C_{CR} */ 1217 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1218 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1219 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1220 1221 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1222 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1223 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1224 for (i=0;i<n_constraints;i++) { 1225 const PetscScalar *row_cmat_values; 1226 const PetscInt *row_cmat_indices; 1227 PetscInt size_of_constraint,j; 1228 1229 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1230 for (j=0;j<size_of_constraint;j++) { 1231 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1232 } 1233 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1234 } 1235 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1236 if (F) { 1237 Mat B; 1238 1239 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1240 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1241 ierr = MatDestroy(&B);CHKERRQ(ierr); 1242 } else { 1243 PetscScalar *marr; 1244 1245 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1246 for (i=0;i<n_constraints;i++) { 1247 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1248 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1249 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1250 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1251 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1252 } 1253 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1254 } 1255 if (!pcbddc->switch_static) { 1256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1257 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1258 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1259 for (i=0;i<n_constraints;i++) { 1260 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1261 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1262 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1263 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1264 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1265 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1266 } 1267 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1268 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1269 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1270 } else { 1271 if (lda_rhs != n_R) { 1272 IS dummy; 1273 1274 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1275 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1276 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1277 } else { 1278 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1279 pcbddc->local_auxmat2 = local_auxmat2_R; 1280 } 1281 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1282 } 1283 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1284 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1285 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1286 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1287 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1288 if (isCHOL) { 1289 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1290 } else { 1291 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1292 } 1293 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1294 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1295 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1296 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1297 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1298 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1299 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1300 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1301 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1302 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1303 } 1304 /* Get submatrices from subdomain matrix */ 1305 if (pcbddc->benign_n) { 1306 IS dummy; 1307 Mat B0_R; 1308 PetscReal norm; 1309 1310 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1311 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1312 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1313 if (norm > PETSC_SMALL) { 1314 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1315 } 1316 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1317 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1318 } 1319 1320 if (n_vertices) { 1321 IS is_aux; 1322 1323 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1324 IS tis; 1325 1326 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1327 ierr = ISSort(tis);CHKERRQ(ierr); 1328 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1329 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1330 } else { 1331 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1332 } 1333 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1334 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1335 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1336 if (pcbddc->benign_n) { 1337 IS dummy; 1338 1339 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1340 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1341 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1342 } 1343 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1344 } 1345 1346 /* Matrix of coarse basis functions (local) */ 1347 if (pcbddc->coarse_phi_B) { 1348 PetscInt on_B,on_primal,on_D=n_D; 1349 if (pcbddc->coarse_phi_D) { 1350 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1351 } 1352 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1353 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1354 PetscScalar *marray; 1355 1356 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1357 ierr = PetscFree(marray);CHKERRQ(ierr); 1358 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1359 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1360 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1361 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1362 } 1363 } 1364 1365 if (!pcbddc->coarse_phi_B) { 1366 PetscScalar *marray; 1367 1368 n = n_B*pcbddc->local_primal_size; 1369 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1370 n += n_D*pcbddc->local_primal_size; 1371 } 1372 if (!pcbddc->symmetric_primal) { 1373 n *= 2; 1374 } 1375 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1376 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1377 n = n_B*pcbddc->local_primal_size; 1378 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1379 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1380 n += n_D*pcbddc->local_primal_size; 1381 } 1382 if (!pcbddc->symmetric_primal) { 1383 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1384 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1385 n = n_B*pcbddc->local_primal_size; 1386 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1387 } 1388 } else { 1389 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1390 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1391 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1392 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1393 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1394 } 1395 } 1396 } 1397 1398 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1399 p0_lidx_I = NULL; 1400 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1401 const PetscInt *idxs; 1402 1403 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1404 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1405 for (i=0;i<pcbddc->benign_n;i++) { 1406 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1407 } 1408 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1409 } 1410 1411 /* vertices */ 1412 if (n_vertices) { 1413 1414 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1415 1416 if (n_R) { 1417 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1418 PetscBLASInt B_N,B_one = 1; 1419 PetscScalar *x,*y; 1420 PetscBool isseqaij; 1421 1422 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1423 if (lda_rhs == n_R) { 1424 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1425 } else { 1426 Mat A_RVt; 1427 PetscScalar *array; 1428 1429 /* TODO optimize */ 1430 ierr = MatConvert(A_RV,MATDENSE,MAT_INITIAL_MATRIX,&A_RVt);CHKERRQ(ierr); 1431 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1432 ierr = PetscMemzero(work+lda_rhs*n_vertices,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1433 ierr = MatDenseGetArray(A_RVt,&array);CHKERRQ(ierr); 1434 for (i=0;i<n_vertices;i++) { 1435 ierr = PetscMemcpy(work+(n_vertices+i)*lda_rhs,array+i*n_R,n_R*sizeof(PetscScalar));CHKERRQ(ierr); 1436 } 1437 ierr = MatDenseRestoreArray(A_RVt,&array);CHKERRQ(ierr); 1438 ierr = MatDestroy(&A_RVt);CHKERRQ(ierr); 1439 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&A_RV);CHKERRQ(ierr); 1440 } 1441 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1442 if (F) { /* TODO could be optimized for symmetric problems */ 1443 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1444 } else { 1445 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1446 for (i=0;i<n_vertices;i++) { 1447 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1448 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1449 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1450 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1451 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1452 } 1453 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1454 } 1455 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1456 /* S_VV and S_CV */ 1457 if (n_constraints) { 1458 Mat B; 1459 1460 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1461 for (i=0;i<n_vertices;i++) { 1462 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1463 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1464 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1465 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1467 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1468 } 1469 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1470 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1471 ierr = MatDestroy(&B);CHKERRQ(ierr); 1472 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1473 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1474 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1475 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1476 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1477 ierr = MatDestroy(&B);CHKERRQ(ierr); 1478 } 1479 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1480 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1481 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1482 } 1483 if (lda_rhs != n_R) { 1484 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1485 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1486 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1487 } 1488 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1489 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1490 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1491 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1492 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1493 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1494 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1495 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1496 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1497 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1498 } else { 1499 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1500 } 1501 for (i=0;i<pcbddc->benign_n;i++) { 1502 const PetscScalar *vals; 1503 const PetscInt *idxs; 1504 PetscInt n,j,primal_idx; 1505 1506 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1507 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1508 for (j=0;j<n;j++) { 1509 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1510 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1511 } 1512 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1513 } 1514 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1515 1516 /* coarse basis functions */ 1517 for (i=0;i<n_vertices;i++) { 1518 PetscScalar *y; 1519 1520 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1521 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1522 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1523 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1525 y[n_B*i+idx_V_B[i]] = 1.0; 1526 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1527 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1528 1529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1530 PetscInt j; 1531 1532 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1533 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1534 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1536 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1537 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1538 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1539 } 1540 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1541 } 1542 /* if n_R == 0 the object is not destroyed */ 1543 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1544 } 1545 1546 if (n_constraints) { 1547 Mat B; 1548 1549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1550 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1551 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1552 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1553 if (n_vertices) { 1554 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1555 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1556 } else { 1557 Mat S_VCt; 1558 1559 if (lda_rhs != n_R) { 1560 ierr = MatDestroy(&B);CHKERRQ(ierr); 1561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1562 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1563 } 1564 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1565 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1566 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1567 } 1568 } 1569 ierr = MatDestroy(&B);CHKERRQ(ierr); 1570 /* coarse basis functions */ 1571 for (i=0;i<n_constraints;i++) { 1572 PetscScalar *y; 1573 1574 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1575 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1576 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1577 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1578 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1580 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1581 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1582 PetscInt j; 1583 1584 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1585 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1586 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1589 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1590 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1591 } 1592 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1593 } 1594 } 1595 if (n_constraints) { 1596 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1597 } 1598 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1599 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1600 1601 /* compute other basis functions for non-symmetric problems */ 1602 if (!pcbddc->symmetric_primal) { 1603 Mat B_V=NULL,B_C=NULL; 1604 PetscScalar *marray; 1605 1606 if (n_constraints) { 1607 Mat S_CCT,C_CRT; 1608 1609 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 1610 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1611 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1612 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1613 if (n_vertices) { 1614 Mat S_VCT; 1615 1616 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1617 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1618 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1619 } 1620 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 1621 } 1622 if (n_vertices && n_R) { 1623 PetscScalar *av,*marray; 1624 const PetscInt *xadj,*adjncy; 1625 PetscInt n; 1626 PetscBool flg_row; 1627 1628 /* B_V = B_V - A_VR^T */ 1629 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1630 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1631 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 1632 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1633 for (i=0;i<n;i++) { 1634 PetscInt j; 1635 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 1636 } 1637 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1638 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1639 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1640 } 1641 1642 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1643 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1644 for (i=0;i<n_vertices;i++) { 1645 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 1646 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1647 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1648 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1649 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1650 } 1651 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1652 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 1653 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 1654 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 1655 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1656 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1658 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1659 } 1660 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 1661 /* coarse basis functions */ 1662 for (i=0;i<pcbddc->local_primal_size;i++) { 1663 PetscScalar *y; 1664 1665 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1666 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1667 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1668 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1669 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1670 if (i<n_vertices) { 1671 y[n_B*i+idx_V_B[i]] = 1.0; 1672 } 1673 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1674 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1675 1676 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1677 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1678 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1679 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1680 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1681 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1682 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1683 } 1684 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1685 } 1686 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1687 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1688 } 1689 /* free memory */ 1690 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1691 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1692 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1693 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1694 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1695 ierr = PetscFree(work);CHKERRQ(ierr); 1696 if (n_vertices) { 1697 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1698 } 1699 if (n_constraints) { 1700 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1701 } 1702 /* Checking coarse_sub_mat and coarse basis functios */ 1703 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1704 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1705 if (pcbddc->dbg_flag) { 1706 Mat coarse_sub_mat; 1707 Mat AUXMAT,TM1,TM2,TM3,TM4; 1708 Mat coarse_phi_D,coarse_phi_B; 1709 Mat coarse_psi_D,coarse_psi_B; 1710 Mat A_II,A_BB,A_IB,A_BI; 1711 Mat C_B,CPHI; 1712 IS is_dummy; 1713 Vec mones; 1714 MatType checkmattype=MATSEQAIJ; 1715 PetscReal real_value; 1716 1717 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1718 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1719 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1720 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1721 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1722 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1723 if (!pcbddc->symmetric_primal) { 1724 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1725 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1726 } 1727 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1728 1729 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1730 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1731 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1732 if (!pcbddc->symmetric_primal) { 1733 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1734 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1735 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1736 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1737 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1738 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1739 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1740 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1741 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1742 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1743 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1744 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1745 } else { 1746 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1747 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1748 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1749 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1750 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1751 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1752 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1753 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1754 } 1755 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1756 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1757 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1758 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1759 if (pcbddc->benign_n) { 1760 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1761 PetscScalar *data,*data2; 1762 PetscInt j; 1763 1764 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1765 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1766 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1767 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1768 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1769 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1770 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1771 for (j=0;j<pcbddc->benign_n;j++) { 1772 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1773 for (i=0;i<pcbddc->local_primal_size;i++) { 1774 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1775 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1776 } 1777 } 1778 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1779 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1780 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1781 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1782 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1783 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1784 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1785 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1786 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1787 } 1788 #if 0 1789 { 1790 PetscViewer viewer; 1791 char filename[256]; 1792 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 1793 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1794 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1795 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 1796 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 1797 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 1798 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1799 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 1800 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 1801 if (pcbddc->coarse_phi_D) { 1802 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 1803 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 1804 } 1805 if (pcbddc->coarse_psi_B) { 1806 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 1807 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 1808 } 1809 if (pcbddc->coarse_phi_B) { 1810 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 1811 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 1812 } 1813 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1814 } 1815 #endif 1816 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1817 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1818 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1819 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1820 1821 /* check constraints */ 1822 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1823 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1824 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1825 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1826 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1827 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1828 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1829 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1830 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1831 if (!pcbddc->symmetric_primal) { 1832 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1833 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1834 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1835 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1836 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1837 } 1838 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1839 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1840 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1841 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1842 } 1843 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1844 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1845 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1846 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1847 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1848 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1849 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1850 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1851 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1852 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1853 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1854 if (!pcbddc->symmetric_primal) { 1855 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1856 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1857 } 1858 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1859 } 1860 /* get back data */ 1861 *coarse_submat_vals_n = coarse_submat_vals; 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1867 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1868 { 1869 Mat *work_mat; 1870 IS isrow_s,iscol_s; 1871 PetscBool rsorted,csorted; 1872 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1873 PetscErrorCode ierr; 1874 1875 PetscFunctionBegin; 1876 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1877 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1878 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1879 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1880 1881 if (!rsorted) { 1882 const PetscInt *idxs; 1883 PetscInt *idxs_sorted,i; 1884 1885 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1886 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1887 for (i=0;i<rsize;i++) { 1888 idxs_perm_r[i] = i; 1889 } 1890 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1891 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1892 for (i=0;i<rsize;i++) { 1893 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1894 } 1895 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1896 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1897 } else { 1898 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1899 isrow_s = isrow; 1900 } 1901 1902 if (!csorted) { 1903 if (isrow == iscol) { 1904 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1905 iscol_s = isrow_s; 1906 } else { 1907 const PetscInt *idxs; 1908 PetscInt *idxs_sorted,i; 1909 1910 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1911 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1912 for (i=0;i<csize;i++) { 1913 idxs_perm_c[i] = i; 1914 } 1915 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1916 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1917 for (i=0;i<csize;i++) { 1918 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1919 } 1920 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1921 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1922 } 1923 } else { 1924 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1925 iscol_s = iscol; 1926 } 1927 1928 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1929 1930 if (!rsorted || !csorted) { 1931 Mat new_mat; 1932 IS is_perm_r,is_perm_c; 1933 1934 if (!rsorted) { 1935 PetscInt *idxs_r,i; 1936 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1937 for (i=0;i<rsize;i++) { 1938 idxs_r[idxs_perm_r[i]] = i; 1939 } 1940 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1941 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1942 } else { 1943 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1944 } 1945 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1946 1947 if (!csorted) { 1948 if (isrow_s == iscol_s) { 1949 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1950 is_perm_c = is_perm_r; 1951 } else { 1952 PetscInt *idxs_c,i; 1953 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1954 for (i=0;i<csize;i++) { 1955 idxs_c[idxs_perm_c[i]] = i; 1956 } 1957 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1958 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1959 } 1960 } else { 1961 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1962 } 1963 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1964 1965 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1966 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1967 work_mat[0] = new_mat; 1968 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1969 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1970 } 1971 1972 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1973 *B = work_mat[0]; 1974 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1975 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1976 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1982 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1983 { 1984 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1985 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1986 Mat new_mat; 1987 IS is_local,is_global; 1988 PetscInt local_size; 1989 PetscBool isseqaij; 1990 PetscErrorCode ierr; 1991 1992 PetscFunctionBegin; 1993 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1994 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1995 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1996 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1997 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1998 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1999 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 2000 2001 /* check */ 2002 if (pcbddc->dbg_flag) { 2003 Vec x,x_change; 2004 PetscReal error; 2005 2006 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 2007 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 2008 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 2009 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2010 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2011 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 2012 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2013 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2014 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2015 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2016 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2017 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2018 ierr = VecDestroy(&x);CHKERRQ(ierr); 2019 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2020 } 2021 2022 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2023 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2024 if (isseqaij) { 2025 Mat M; 2026 2027 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2028 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2029 ierr = MatDestroy(&M);CHKERRQ(ierr); 2030 } else { 2031 Mat work_mat,M; 2032 2033 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2034 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2035 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2036 ierr = MatDestroy(&M);CHKERRQ(ierr); 2037 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2038 } 2039 if (matis->A->symmetric_set) { 2040 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2041 #if !defined(PETSC_USE_COMPLEX) 2042 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2043 #endif 2044 } 2045 /* 2046 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2047 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2048 */ 2049 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 #undef __FUNCT__ 2054 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2055 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2056 { 2057 PC_IS* pcis = (PC_IS*)(pc->data); 2058 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2059 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2060 PetscInt *idx_R_local=NULL; 2061 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2062 PetscInt vbs,bs; 2063 PetscBT bitmask=NULL; 2064 PetscErrorCode ierr; 2065 2066 PetscFunctionBegin; 2067 /* 2068 No need to setup local scatters if 2069 - primal space is unchanged 2070 AND 2071 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2072 AND 2073 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2074 */ 2075 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2076 PetscFunctionReturn(0); 2077 } 2078 /* destroy old objects */ 2079 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2080 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2081 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2082 /* Set Non-overlapping dimensions */ 2083 n_B = pcis->n_B; 2084 n_D = pcis->n - n_B; 2085 n_vertices = pcbddc->n_vertices; 2086 2087 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2088 2089 /* create auxiliary bitmask and allocate workspace */ 2090 if (!sub_schurs->reuse_mumps) { 2091 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2092 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2093 for (i=0;i<n_vertices;i++) { 2094 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2095 } 2096 2097 for (i=0, n_R=0; i<pcis->n; i++) { 2098 if (!PetscBTLookup(bitmask,i)) { 2099 idx_R_local[n_R++] = i; 2100 } 2101 } 2102 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2103 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2104 2105 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2106 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2107 } 2108 2109 /* Block code */ 2110 vbs = 1; 2111 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2112 if (bs>1 && !(n_vertices%bs)) { 2113 PetscBool is_blocked = PETSC_TRUE; 2114 PetscInt *vary; 2115 if (!sub_schurs->reuse_mumps) { 2116 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2117 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2118 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2119 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2120 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2121 for (i=0; i<pcis->n/bs; i++) { 2122 if (vary[i]!=0 && vary[i]!=bs) { 2123 is_blocked = PETSC_FALSE; 2124 break; 2125 } 2126 } 2127 ierr = PetscFree(vary);CHKERRQ(ierr); 2128 } else { 2129 /* Verify directly the R set */ 2130 for (i=0; i<n_R/bs; i++) { 2131 PetscInt j,node=idx_R_local[bs*i]; 2132 for (j=1; j<bs; j++) { 2133 if (node != idx_R_local[bs*i+j]-j) { 2134 is_blocked = PETSC_FALSE; 2135 break; 2136 } 2137 } 2138 } 2139 } 2140 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2141 vbs = bs; 2142 for (i=0;i<n_R/vbs;i++) { 2143 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2144 } 2145 } 2146 } 2147 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2148 if (sub_schurs->reuse_mumps) { 2149 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2150 2151 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2152 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2153 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2154 reuse_mumps->is_R = pcbddc->is_R_local; 2155 } else { 2156 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2157 } 2158 2159 /* print some info if requested */ 2160 if (pcbddc->dbg_flag) { 2161 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2162 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2163 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2164 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2166 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2167 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2168 } 2169 2170 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2171 if (!sub_schurs->reuse_mumps) { 2172 IS is_aux1,is_aux2; 2173 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2174 2175 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2176 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2177 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2178 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2179 for (i=0; i<n_D; i++) { 2180 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2181 } 2182 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2183 for (i=0, j=0; i<n_R; i++) { 2184 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2185 aux_array1[j++] = i; 2186 } 2187 } 2188 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2189 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2190 for (i=0, j=0; i<n_B; i++) { 2191 if (!PetscBTLookup(bitmask,is_indices[i])) { 2192 aux_array2[j++] = i; 2193 } 2194 } 2195 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2196 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2197 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2198 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2199 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2200 2201 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2202 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2203 for (i=0, j=0; i<n_R; i++) { 2204 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2205 aux_array1[j++] = i; 2206 } 2207 } 2208 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2209 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2210 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2211 } 2212 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2213 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2214 } else { 2215 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2216 IS tis; 2217 PetscInt schur_size; 2218 2219 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2220 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2221 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2222 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2223 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2224 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2225 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2226 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2227 } 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2235 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2236 { 2237 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2238 PC_IS *pcis = (PC_IS*)pc->data; 2239 PC pc_temp; 2240 Mat A_RR; 2241 MatReuse reuse; 2242 PetscScalar m_one = -1.0; 2243 PetscReal value; 2244 PetscInt n_D,n_R; 2245 PetscBool use_exact,use_exact_reduced,issbaij; 2246 PetscErrorCode ierr; 2247 /* prefixes stuff */ 2248 char dir_prefix[256],neu_prefix[256],str_level[16]; 2249 size_t len; 2250 2251 PetscFunctionBegin; 2252 2253 /* compute prefixes */ 2254 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2255 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2256 if (!pcbddc->current_level) { 2257 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2258 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2259 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2260 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2261 } else { 2262 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2263 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2264 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2265 len -= 15; /* remove "pc_bddc_coarse_" */ 2266 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2267 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2268 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2269 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2270 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2271 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2272 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2273 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2274 } 2275 2276 /* DIRICHLET PROBLEM */ 2277 if (dirichlet) { 2278 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2279 if (pcbddc->local_mat->symmetric_set) { 2280 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2281 } 2282 /* Matrix for Dirichlet problem is pcis->A_II */ 2283 n_D = pcis->n - pcis->n_B; 2284 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2285 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2286 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2287 /* default */ 2288 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2289 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2290 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2291 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2292 if (issbaij) { 2293 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2294 } else { 2295 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2296 } 2297 /* Allow user's customization */ 2298 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2299 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2300 } 2301 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2302 if (sub_schurs->reuse_mumps) { 2303 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2304 2305 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2306 } 2307 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2308 if (!n_D) { 2309 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2310 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2311 } 2312 /* Set Up KSP for Dirichlet problem of BDDC */ 2313 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2314 /* set ksp_D into pcis data */ 2315 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2316 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2317 pcis->ksp_D = pcbddc->ksp_D; 2318 } 2319 2320 /* NEUMANN PROBLEM */ 2321 A_RR = 0; 2322 if (neumann) { 2323 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2324 PetscInt ibs,mbs; 2325 PetscBool issbaij; 2326 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2327 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2328 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2329 if (pcbddc->ksp_R) { /* already created ksp */ 2330 PetscInt nn_R; 2331 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2332 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2333 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2334 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2335 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2336 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2337 reuse = MAT_INITIAL_MATRIX; 2338 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2339 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2340 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2341 reuse = MAT_INITIAL_MATRIX; 2342 } else { /* safe to reuse the matrix */ 2343 reuse = MAT_REUSE_MATRIX; 2344 } 2345 } 2346 /* last check */ 2347 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2348 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2349 reuse = MAT_INITIAL_MATRIX; 2350 } 2351 } else { /* first time, so we need to create the matrix */ 2352 reuse = MAT_INITIAL_MATRIX; 2353 } 2354 /* extract A_RR */ 2355 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2356 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2357 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2358 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2359 if (matis->A == pcbddc->local_mat) { 2360 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2361 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2362 } else { 2363 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2364 } 2365 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2366 if (matis->A == pcbddc->local_mat) { 2367 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2368 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2369 } else { 2370 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2371 } 2372 } 2373 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2374 if (pcbddc->local_mat->symmetric_set) { 2375 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2376 } 2377 if (!pcbddc->ksp_R) { /* create object if not present */ 2378 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2379 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2380 /* default */ 2381 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2382 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2383 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2384 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2385 if (issbaij) { 2386 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2387 } else { 2388 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2389 } 2390 /* Allow user's customization */ 2391 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2392 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2393 } 2394 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2395 if (!n_R) { 2396 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2397 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2398 } 2399 /* Reuse MUMPS solver if it is present */ 2400 if (sub_schurs->reuse_mumps) { 2401 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2402 2403 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2404 } 2405 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2406 /* Set Up KSP for Neumann problem of BDDC */ 2407 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2408 } 2409 /* free Neumann problem's matrix */ 2410 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2411 2412 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2413 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2414 if (pcbddc->dbg_flag) { 2415 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2416 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2417 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2418 } 2419 if (dirichlet) { /* Dirichlet */ 2420 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2421 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2422 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2423 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2424 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2425 /* need to be adapted? */ 2426 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2427 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2428 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2429 /* print info */ 2430 if (pcbddc->dbg_flag) { 2431 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2432 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2433 } 2434 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2435 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2436 } 2437 } 2438 if (neumann) { /* Neumann */ 2439 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2440 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2441 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2442 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2443 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2444 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2445 /* need to be adapted? */ 2446 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2447 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2448 /* print info */ 2449 if (pcbddc->dbg_flag) { 2450 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2451 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2452 } 2453 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2454 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2455 } 2456 } 2457 } 2458 PetscFunctionReturn(0); 2459 } 2460 2461 #undef __FUNCT__ 2462 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2463 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2464 { 2465 PetscErrorCode ierr; 2466 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2467 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2468 2469 PetscFunctionBegin; 2470 if (!sub_schurs->reuse_mumps) { 2471 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2472 } 2473 if (!pcbddc->switch_static) { 2474 if (applytranspose && pcbddc->local_auxmat1) { 2475 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2476 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2477 } 2478 if (!sub_schurs->reuse_mumps) { 2479 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2480 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2481 } else { 2482 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2483 2484 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2485 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2486 } 2487 } else { 2488 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2489 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2490 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2491 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2492 if (applytranspose && pcbddc->local_auxmat1) { 2493 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2494 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2495 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2496 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2497 } 2498 } 2499 if (!sub_schurs->reuse_mumps) { 2500 if (applytranspose) { 2501 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2502 } else { 2503 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2504 } 2505 #if defined(PETSC_HAVE_MUMPS) 2506 } else { 2507 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2508 2509 if (applytranspose) { 2510 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2511 } else { 2512 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2513 } 2514 #endif 2515 } 2516 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2517 if (!pcbddc->switch_static) { 2518 if (!sub_schurs->reuse_mumps) { 2519 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2520 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2521 } else { 2522 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2523 2524 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2525 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2526 } 2527 if (!applytranspose && pcbddc->local_auxmat1) { 2528 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2529 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2530 } 2531 } else { 2532 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2533 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2534 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2535 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2536 if (!applytranspose && pcbddc->local_auxmat1) { 2537 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2538 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2539 } 2540 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2541 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2542 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2543 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2544 } 2545 PetscFunctionReturn(0); 2546 } 2547 2548 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2549 #undef __FUNCT__ 2550 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2551 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2552 { 2553 PetscErrorCode ierr; 2554 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2555 PC_IS* pcis = (PC_IS*) (pc->data); 2556 const PetscScalar zero = 0.0; 2557 2558 PetscFunctionBegin; 2559 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2560 if (applytranspose) { 2561 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2562 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2563 } else { 2564 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2565 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2566 } 2567 2568 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2569 if (pcbddc->benign_n) { 2570 PetscScalar *array; 2571 PetscInt j; 2572 2573 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2574 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2575 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2576 } 2577 2578 /* start communications from local primal nodes to rhs of coarse solver */ 2579 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2580 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2581 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2582 2583 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2584 /* TODO remove null space when doing multilevel */ 2585 if (pcbddc->coarse_ksp) { 2586 Mat coarse_mat; 2587 Vec rhs,sol; 2588 MatNullSpace nullsp; 2589 2590 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2591 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2592 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2593 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2594 if (nullsp) { 2595 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2596 } 2597 if (applytranspose) { 2598 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2599 } else { 2600 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2601 } 2602 if (nullsp) { 2603 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2604 } 2605 } 2606 2607 /* Local solution on R nodes */ 2608 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2609 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2610 } 2611 2612 /* communications from coarse sol to local primal nodes */ 2613 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2614 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2615 2616 /* Sum contributions from two levels */ 2617 if (applytranspose) { 2618 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2619 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2620 } else { 2621 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2622 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2623 } 2624 /* store p0 */ 2625 if (pcbddc->benign_n) { 2626 PetscScalar *array; 2627 PetscInt j; 2628 2629 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2630 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2631 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2632 } 2633 PetscFunctionReturn(0); 2634 } 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2638 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2639 { 2640 PetscErrorCode ierr; 2641 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2642 PetscScalar *array; 2643 Vec from,to; 2644 2645 PetscFunctionBegin; 2646 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2647 from = pcbddc->coarse_vec; 2648 to = pcbddc->vec1_P; 2649 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2650 Vec tvec; 2651 2652 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2653 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2654 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2655 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2656 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2657 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2658 } 2659 } else { /* from local to global -> put data in coarse right hand side */ 2660 from = pcbddc->vec1_P; 2661 to = pcbddc->coarse_vec; 2662 } 2663 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 #undef __FUNCT__ 2668 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2669 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2670 { 2671 PetscErrorCode ierr; 2672 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2673 PetscScalar *array; 2674 Vec from,to; 2675 2676 PetscFunctionBegin; 2677 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2678 from = pcbddc->coarse_vec; 2679 to = pcbddc->vec1_P; 2680 } else { /* from local to global -> put data in coarse right hand side */ 2681 from = pcbddc->vec1_P; 2682 to = pcbddc->coarse_vec; 2683 } 2684 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2685 if (smode == SCATTER_FORWARD) { 2686 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2687 Vec tvec; 2688 2689 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2690 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2691 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2692 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2693 } 2694 } else { 2695 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2696 ierr = VecResetArray(from);CHKERRQ(ierr); 2697 } 2698 } 2699 PetscFunctionReturn(0); 2700 } 2701 2702 /* uncomment for testing purposes */ 2703 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2704 #undef __FUNCT__ 2705 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2706 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2707 { 2708 PetscErrorCode ierr; 2709 PC_IS* pcis = (PC_IS*)(pc->data); 2710 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2711 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2712 /* one and zero */ 2713 PetscScalar one=1.0,zero=0.0; 2714 /* space to store constraints and their local indices */ 2715 PetscScalar *constraints_data; 2716 PetscInt *constraints_idxs,*constraints_idxs_B; 2717 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2718 PetscInt *constraints_n; 2719 /* iterators */ 2720 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2721 /* BLAS integers */ 2722 PetscBLASInt lwork,lierr; 2723 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2724 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2725 /* reuse */ 2726 PetscInt olocal_primal_size,olocal_primal_size_cc; 2727 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2728 /* change of basis */ 2729 PetscBool qr_needed; 2730 PetscBT change_basis,qr_needed_idx; 2731 /* auxiliary stuff */ 2732 PetscInt *nnz,*is_indices; 2733 PetscInt ncc; 2734 /* some quantities */ 2735 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2736 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2737 2738 PetscFunctionBegin; 2739 /* Destroy Mat objects computed previously */ 2740 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2741 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2742 /* save info on constraints from previous setup (if any) */ 2743 olocal_primal_size = pcbddc->local_primal_size; 2744 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2745 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2746 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2747 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2748 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2749 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2750 2751 /* print some info */ 2752 if (pcbddc->dbg_flag) { 2753 IS vertices; 2754 PetscInt nv,nedges,nfaces; 2755 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2756 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2757 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2758 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2759 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2760 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2761 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2762 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2763 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2764 } 2765 2766 if (!pcbddc->adaptive_selection) { 2767 IS ISForVertices,*ISForFaces,*ISForEdges; 2768 MatNullSpace nearnullsp; 2769 const Vec *nearnullvecs; 2770 Vec *localnearnullsp; 2771 PetscScalar *array; 2772 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2773 PetscBool nnsp_has_cnst; 2774 /* LAPACK working arrays for SVD or POD */ 2775 PetscBool skip_lapack,boolforchange; 2776 PetscScalar *work; 2777 PetscReal *singular_vals; 2778 #if defined(PETSC_USE_COMPLEX) 2779 PetscReal *rwork; 2780 #endif 2781 #if defined(PETSC_MISSING_LAPACK_GESVD) 2782 PetscScalar *temp_basis,*correlation_mat; 2783 #else 2784 PetscBLASInt dummy_int=1; 2785 PetscScalar dummy_scalar=1.; 2786 #endif 2787 2788 /* Get index sets for faces, edges and vertices from graph */ 2789 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2790 /* free unneeded index sets */ 2791 if (!pcbddc->use_vertices) { 2792 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2793 } 2794 if (!pcbddc->use_edges) { 2795 for (i=0;i<n_ISForEdges;i++) { 2796 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2797 } 2798 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2799 n_ISForEdges = 0; 2800 } 2801 if (!pcbddc->use_faces) { 2802 for (i=0;i<n_ISForFaces;i++) { 2803 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2804 } 2805 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2806 n_ISForFaces = 0; 2807 } 2808 2809 #if defined(PETSC_USE_DEBUG) 2810 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2811 Also use_change_of_basis should be consistent among processors */ 2812 if (pcbddc->NullSpace) { 2813 PetscBool tbool[2],gbool[2]; 2814 2815 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2816 pcbddc->use_change_of_basis = PETSC_TRUE; 2817 if (!ISForEdges) { 2818 pcbddc->use_change_on_faces = PETSC_TRUE; 2819 } 2820 } 2821 tbool[0] = pcbddc->use_change_of_basis; 2822 tbool[1] = pcbddc->use_change_on_faces; 2823 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2824 pcbddc->use_change_of_basis = gbool[0]; 2825 pcbddc->use_change_on_faces = gbool[1]; 2826 } 2827 #endif 2828 2829 /* check if near null space is attached to global mat */ 2830 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2831 if (nearnullsp) { 2832 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2833 /* remove any stored info */ 2834 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2835 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2836 /* store information for BDDC solver reuse */ 2837 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2838 pcbddc->onearnullspace = nearnullsp; 2839 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2840 for (i=0;i<nnsp_size;i++) { 2841 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2842 } 2843 } else { /* if near null space is not provided BDDC uses constants by default */ 2844 nnsp_size = 0; 2845 nnsp_has_cnst = PETSC_TRUE; 2846 } 2847 /* get max number of constraints on a single cc */ 2848 max_constraints = nnsp_size; 2849 if (nnsp_has_cnst) max_constraints++; 2850 2851 /* 2852 Evaluate maximum storage size needed by the procedure 2853 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2854 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2855 There can be multiple constraints per connected component 2856 */ 2857 n_vertices = 0; 2858 if (ISForVertices) { 2859 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2860 } 2861 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2862 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2863 2864 total_counts = n_ISForFaces+n_ISForEdges; 2865 total_counts *= max_constraints; 2866 total_counts += n_vertices; 2867 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2868 2869 total_counts = 0; 2870 max_size_of_constraint = 0; 2871 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2872 IS used_is; 2873 if (i<n_ISForEdges) { 2874 used_is = ISForEdges[i]; 2875 } else { 2876 used_is = ISForFaces[i-n_ISForEdges]; 2877 } 2878 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2879 total_counts += j; 2880 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2881 } 2882 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2883 2884 /* get local part of global near null space vectors */ 2885 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2886 for (k=0;k<nnsp_size;k++) { 2887 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2888 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2889 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2890 } 2891 2892 /* whether or not to skip lapack calls */ 2893 skip_lapack = PETSC_TRUE; 2894 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2895 2896 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2897 if (!skip_lapack) { 2898 PetscScalar temp_work; 2899 2900 #if defined(PETSC_MISSING_LAPACK_GESVD) 2901 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2902 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2903 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2904 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2905 #if defined(PETSC_USE_COMPLEX) 2906 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2907 #endif 2908 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2909 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2910 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2911 lwork = -1; 2912 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2913 #if !defined(PETSC_USE_COMPLEX) 2914 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2915 #else 2916 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2917 #endif 2918 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2919 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2920 #else /* on missing GESVD */ 2921 /* SVD */ 2922 PetscInt max_n,min_n; 2923 max_n = max_size_of_constraint; 2924 min_n = max_constraints; 2925 if (max_size_of_constraint < max_constraints) { 2926 min_n = max_size_of_constraint; 2927 max_n = max_constraints; 2928 } 2929 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2930 #if defined(PETSC_USE_COMPLEX) 2931 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2932 #endif 2933 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2934 lwork = -1; 2935 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2936 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2937 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2938 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2939 #if !defined(PETSC_USE_COMPLEX) 2940 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2941 #else 2942 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2943 #endif 2944 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2945 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2946 #endif /* on missing GESVD */ 2947 /* Allocate optimal workspace */ 2948 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2949 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2950 } 2951 /* Now we can loop on constraining sets */ 2952 total_counts = 0; 2953 constraints_idxs_ptr[0] = 0; 2954 constraints_data_ptr[0] = 0; 2955 /* vertices */ 2956 if (n_vertices) { 2957 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2958 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2959 for (i=0;i<n_vertices;i++) { 2960 constraints_n[total_counts] = 1; 2961 constraints_data[total_counts] = 1.0; 2962 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2963 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2964 total_counts++; 2965 } 2966 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2967 n_vertices = total_counts; 2968 } 2969 2970 /* edges and faces */ 2971 total_counts_cc = total_counts; 2972 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2973 IS used_is; 2974 PetscBool idxs_copied = PETSC_FALSE; 2975 2976 if (ncc<n_ISForEdges) { 2977 used_is = ISForEdges[ncc]; 2978 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2979 } else { 2980 used_is = ISForFaces[ncc-n_ISForEdges]; 2981 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2982 } 2983 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2984 2985 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2986 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2987 /* change of basis should not be performed on local periodic nodes */ 2988 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2989 if (nnsp_has_cnst) { 2990 PetscScalar quad_value; 2991 2992 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2993 idxs_copied = PETSC_TRUE; 2994 2995 if (!pcbddc->use_nnsp_true) { 2996 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2997 } else { 2998 quad_value = 1.0; 2999 } 3000 for (j=0;j<size_of_constraint;j++) { 3001 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 3002 } 3003 temp_constraints++; 3004 total_counts++; 3005 } 3006 for (k=0;k<nnsp_size;k++) { 3007 PetscReal real_value; 3008 PetscScalar *ptr_to_data; 3009 3010 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3011 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 3012 for (j=0;j<size_of_constraint;j++) { 3013 ptr_to_data[j] = array[is_indices[j]]; 3014 } 3015 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3016 /* check if array is null on the connected component */ 3017 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3018 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3019 if (real_value > 0.0) { /* keep indices and values */ 3020 temp_constraints++; 3021 total_counts++; 3022 if (!idxs_copied) { 3023 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3024 idxs_copied = PETSC_TRUE; 3025 } 3026 } 3027 } 3028 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3029 valid_constraints = temp_constraints; 3030 if (!pcbddc->use_nnsp_true && temp_constraints) { 3031 if (temp_constraints == 1) { /* just normalize the constraint */ 3032 PetscScalar norm,*ptr_to_data; 3033 3034 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3035 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3036 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3037 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3038 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3039 } else { /* perform SVD */ 3040 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3041 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3042 3043 #if defined(PETSC_MISSING_LAPACK_GESVD) 3044 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3045 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3046 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3047 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3048 from that computed using LAPACKgesvd 3049 -> This is due to a different computation of eigenvectors in LAPACKheev 3050 -> The quality of the POD-computed basis will be the same */ 3051 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3052 /* Store upper triangular part of correlation matrix */ 3053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3054 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3055 for (j=0;j<temp_constraints;j++) { 3056 for (k=0;k<j+1;k++) { 3057 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3058 } 3059 } 3060 /* compute eigenvalues and eigenvectors of correlation matrix */ 3061 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3062 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3063 #if !defined(PETSC_USE_COMPLEX) 3064 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3065 #else 3066 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3067 #endif 3068 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3069 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3070 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3071 j = 0; 3072 while (j < temp_constraints && singular_vals[j] < tol) j++; 3073 total_counts = total_counts-j; 3074 valid_constraints = temp_constraints-j; 3075 /* scale and copy POD basis into used quadrature memory */ 3076 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3077 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3078 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3080 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3081 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3082 if (j<temp_constraints) { 3083 PetscInt ii; 3084 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3085 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3086 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3087 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3088 for (k=0;k<temp_constraints-j;k++) { 3089 for (ii=0;ii<size_of_constraint;ii++) { 3090 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3091 } 3092 } 3093 } 3094 #else /* on missing GESVD */ 3095 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3096 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3097 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3098 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3099 #if !defined(PETSC_USE_COMPLEX) 3100 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3101 #else 3102 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3103 #endif 3104 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3105 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3106 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3107 k = temp_constraints; 3108 if (k > size_of_constraint) k = size_of_constraint; 3109 j = 0; 3110 while (j < k && singular_vals[k-j-1] < tol) j++; 3111 valid_constraints = k-j; 3112 total_counts = total_counts-temp_constraints+valid_constraints; 3113 #endif /* on missing GESVD */ 3114 } 3115 } 3116 /* update pointers information */ 3117 if (valid_constraints) { 3118 constraints_n[total_counts_cc] = valid_constraints; 3119 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3120 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3121 /* set change_of_basis flag */ 3122 if (boolforchange) { 3123 PetscBTSet(change_basis,total_counts_cc); 3124 } 3125 total_counts_cc++; 3126 } 3127 } 3128 /* free workspace */ 3129 if (!skip_lapack) { 3130 ierr = PetscFree(work);CHKERRQ(ierr); 3131 #if defined(PETSC_USE_COMPLEX) 3132 ierr = PetscFree(rwork);CHKERRQ(ierr); 3133 #endif 3134 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3135 #if defined(PETSC_MISSING_LAPACK_GESVD) 3136 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3137 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3138 #endif 3139 } 3140 for (k=0;k<nnsp_size;k++) { 3141 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3142 } 3143 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3144 /* free index sets of faces, edges and vertices */ 3145 for (i=0;i<n_ISForFaces;i++) { 3146 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3147 } 3148 if (n_ISForFaces) { 3149 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3150 } 3151 for (i=0;i<n_ISForEdges;i++) { 3152 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3153 } 3154 if (n_ISForEdges) { 3155 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3156 } 3157 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3158 } else { 3159 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3160 3161 total_counts = 0; 3162 n_vertices = 0; 3163 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3164 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3165 } 3166 max_constraints = 0; 3167 total_counts_cc = 0; 3168 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3169 total_counts += pcbddc->adaptive_constraints_n[i]; 3170 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3171 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3172 } 3173 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3174 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3175 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3176 constraints_data = pcbddc->adaptive_constraints_data; 3177 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3178 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3179 total_counts_cc = 0; 3180 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3181 if (pcbddc->adaptive_constraints_n[i]) { 3182 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3183 } 3184 } 3185 #if 0 3186 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3187 for (i=0;i<total_counts_cc;i++) { 3188 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3189 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3190 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3191 printf(" %d",constraints_idxs[j]); 3192 } 3193 printf("\n"); 3194 printf("number of cc: %d\n",constraints_n[i]); 3195 } 3196 for (i=0;i<n_vertices;i++) { 3197 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3198 } 3199 for (i=0;i<sub_schurs->n_subs;i++) { 3200 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3201 } 3202 #endif 3203 3204 max_size_of_constraint = 0; 3205 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3206 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3207 /* Change of basis */ 3208 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3209 if (pcbddc->use_change_of_basis) { 3210 for (i=0;i<sub_schurs->n_subs;i++) { 3211 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3212 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3213 } 3214 } 3215 } 3216 } 3217 pcbddc->local_primal_size = total_counts; 3218 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3219 3220 /* map constraints_idxs in boundary numbering */ 3221 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3222 if (i != constraints_idxs_ptr[total_counts_cc]) { 3223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3224 } 3225 3226 /* Create constraint matrix */ 3227 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3228 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3229 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3230 3231 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3232 /* determine if a QR strategy is needed for change of basis */ 3233 qr_needed = PETSC_FALSE; 3234 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3235 total_primal_vertices=0; 3236 pcbddc->local_primal_size_cc = 0; 3237 for (i=0;i<total_counts_cc;i++) { 3238 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3239 if (size_of_constraint == 1) { 3240 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3241 pcbddc->local_primal_size_cc += 1; 3242 } else if (PetscBTLookup(change_basis,i)) { 3243 for (k=0;k<constraints_n[i];k++) { 3244 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3245 } 3246 pcbddc->local_primal_size_cc += constraints_n[i]; 3247 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3248 PetscBTSet(qr_needed_idx,i); 3249 qr_needed = PETSC_TRUE; 3250 } 3251 } else { 3252 pcbddc->local_primal_size_cc += 1; 3253 } 3254 } 3255 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3256 pcbddc->n_vertices = total_primal_vertices; 3257 /* permute indices in order to have a sorted set of vertices */ 3258 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3259 3260 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3261 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3262 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3263 3264 /* nonzero structure of constraint matrix */ 3265 /* and get reference dof for local constraints */ 3266 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3267 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3268 3269 j = total_primal_vertices; 3270 total_counts = total_primal_vertices; 3271 cum = total_primal_vertices; 3272 for (i=n_vertices;i<total_counts_cc;i++) { 3273 if (!PetscBTLookup(change_basis,i)) { 3274 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3275 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3276 cum++; 3277 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3278 for (k=0;k<constraints_n[i];k++) { 3279 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3280 nnz[j+k] = size_of_constraint; 3281 } 3282 j += constraints_n[i]; 3283 } 3284 } 3285 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3286 ierr = PetscFree(nnz);CHKERRQ(ierr); 3287 3288 /* set values in constraint matrix */ 3289 for (i=0;i<total_primal_vertices;i++) { 3290 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3291 } 3292 total_counts = total_primal_vertices; 3293 for (i=n_vertices;i<total_counts_cc;i++) { 3294 if (!PetscBTLookup(change_basis,i)) { 3295 PetscInt *cols; 3296 3297 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3298 cols = constraints_idxs+constraints_idxs_ptr[i]; 3299 for (k=0;k<constraints_n[i];k++) { 3300 PetscInt row = total_counts+k; 3301 PetscScalar *vals; 3302 3303 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3304 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3305 } 3306 total_counts += constraints_n[i]; 3307 } 3308 } 3309 /* assembling */ 3310 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3311 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3312 3313 /* 3314 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3315 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3316 */ 3317 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3318 if (pcbddc->use_change_of_basis) { 3319 /* dual and primal dofs on a single cc */ 3320 PetscInt dual_dofs,primal_dofs; 3321 /* working stuff for GEQRF */ 3322 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3323 PetscBLASInt lqr_work; 3324 /* working stuff for UNGQR */ 3325 PetscScalar *gqr_work,lgqr_work_t; 3326 PetscBLASInt lgqr_work; 3327 /* working stuff for TRTRS */ 3328 PetscScalar *trs_rhs; 3329 PetscBLASInt Blas_NRHS; 3330 /* pointers for values insertion into change of basis matrix */ 3331 PetscInt *start_rows,*start_cols; 3332 PetscScalar *start_vals; 3333 /* working stuff for values insertion */ 3334 PetscBT is_primal; 3335 PetscInt *aux_primal_numbering_B; 3336 /* matrix sizes */ 3337 PetscInt global_size,local_size; 3338 /* temporary change of basis */ 3339 Mat localChangeOfBasisMatrix; 3340 /* extra space for debugging */ 3341 PetscScalar *dbg_work; 3342 3343 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3344 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3345 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3346 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3347 /* nonzeros for local mat */ 3348 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3349 for (i=0;i<pcis->n;i++) nnz[i]=1; 3350 for (i=n_vertices;i<total_counts_cc;i++) { 3351 if (PetscBTLookup(change_basis,i)) { 3352 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3353 if (PetscBTLookup(qr_needed_idx,i)) { 3354 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3355 } else { 3356 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3357 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3358 } 3359 } 3360 } 3361 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3362 ierr = PetscFree(nnz);CHKERRQ(ierr); 3363 /* Set initial identity in the matrix */ 3364 for (i=0;i<pcis->n;i++) { 3365 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3366 } 3367 3368 if (pcbddc->dbg_flag) { 3369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3370 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3371 } 3372 3373 3374 /* Now we loop on the constraints which need a change of basis */ 3375 /* 3376 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3377 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3378 3379 Basic blocks of change of basis matrix T computed by 3380 3381 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3382 3383 | 1 0 ... 0 s_1/S | 3384 | 0 1 ... 0 s_2/S | 3385 | ... | 3386 | 0 ... 1 s_{n-1}/S | 3387 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3388 3389 with S = \sum_{i=1}^n s_i^2 3390 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3391 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3392 3393 - QR decomposition of constraints otherwise 3394 */ 3395 if (qr_needed) { 3396 /* space to store Q */ 3397 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3398 /* first we issue queries for optimal work */ 3399 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3400 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3401 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3402 lqr_work = -1; 3403 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3404 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3405 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3406 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3407 lgqr_work = -1; 3408 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3409 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3410 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3411 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3412 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3413 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3414 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3415 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3416 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3417 /* array to store scaling factors for reflectors */ 3418 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3419 /* array to store rhs and solution of triangular solver */ 3420 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3421 /* allocating workspace for check */ 3422 if (pcbddc->dbg_flag) { 3423 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3424 } 3425 } 3426 /* array to store whether a node is primal or not */ 3427 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3428 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3429 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3430 if (i != total_primal_vertices) { 3431 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3432 } 3433 for (i=0;i<total_primal_vertices;i++) { 3434 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3435 } 3436 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3437 3438 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3439 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3440 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3441 if (PetscBTLookup(change_basis,total_counts)) { 3442 /* get constraint info */ 3443 primal_dofs = constraints_n[total_counts]; 3444 dual_dofs = size_of_constraint-primal_dofs; 3445 3446 if (pcbddc->dbg_flag) { 3447 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3448 } 3449 3450 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3451 3452 /* copy quadrature constraints for change of basis check */ 3453 if (pcbddc->dbg_flag) { 3454 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3455 } 3456 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3457 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3458 3459 /* compute QR decomposition of constraints */ 3460 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3461 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3462 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3463 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3464 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3465 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3466 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3467 3468 /* explictly compute R^-T */ 3469 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3470 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3471 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3472 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3474 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3475 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3476 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3477 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3478 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3479 3480 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3481 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3483 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3484 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3485 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3486 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3487 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3488 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3489 3490 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3491 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3492 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3493 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3494 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3495 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3496 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3497 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3499 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3500 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3501 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3502 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3503 3504 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3505 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3506 /* insert cols for primal dofs */ 3507 for (j=0;j<primal_dofs;j++) { 3508 start_vals = &qr_basis[j*size_of_constraint]; 3509 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3510 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3511 } 3512 /* insert cols for dual dofs */ 3513 for (j=0,k=0;j<dual_dofs;k++) { 3514 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3515 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3516 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3517 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3518 j++; 3519 } 3520 } 3521 3522 /* check change of basis */ 3523 if (pcbddc->dbg_flag) { 3524 PetscInt ii,jj; 3525 PetscBool valid_qr=PETSC_TRUE; 3526 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3527 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3529 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3531 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3532 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3533 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3534 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3535 for (jj=0;jj<size_of_constraint;jj++) { 3536 for (ii=0;ii<primal_dofs;ii++) { 3537 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3538 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3539 } 3540 } 3541 if (!valid_qr) { 3542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3543 for (jj=0;jj<size_of_constraint;jj++) { 3544 for (ii=0;ii<primal_dofs;ii++) { 3545 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3546 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3547 } 3548 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3549 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3550 } 3551 } 3552 } 3553 } else { 3554 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3555 } 3556 } 3557 } else { /* simple transformation block */ 3558 PetscInt row,col; 3559 PetscScalar val,norm; 3560 3561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3562 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3563 for (j=0;j<size_of_constraint;j++) { 3564 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3565 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3566 if (!PetscBTLookup(is_primal,row_B)) { 3567 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3568 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3569 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3570 } else { 3571 for (k=0;k<size_of_constraint;k++) { 3572 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3573 if (row != col) { 3574 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3575 } else { 3576 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3577 } 3578 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3579 } 3580 } 3581 } 3582 if (pcbddc->dbg_flag) { 3583 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3584 } 3585 } 3586 } else { 3587 if (pcbddc->dbg_flag) { 3588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3589 } 3590 } 3591 } 3592 3593 /* free workspace */ 3594 if (qr_needed) { 3595 if (pcbddc->dbg_flag) { 3596 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3597 } 3598 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3599 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3600 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3601 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3602 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3603 } 3604 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3605 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3607 3608 /* assembling of global change of variable */ 3609 { 3610 Mat tmat; 3611 PetscInt bs; 3612 3613 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3614 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3615 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3616 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3617 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3618 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3619 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3620 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3621 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3622 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3623 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3624 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3625 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3626 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3627 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3628 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3629 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3630 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3631 } 3632 /* check */ 3633 if (pcbddc->dbg_flag) { 3634 PetscReal error; 3635 Vec x,x_change; 3636 3637 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3638 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3639 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3640 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3641 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3642 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3643 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3644 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3645 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3646 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3647 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3648 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3650 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3651 ierr = VecDestroy(&x);CHKERRQ(ierr); 3652 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3653 } 3654 3655 /* adapt sub_schurs computed (if any) */ 3656 if (pcbddc->use_deluxe_scaling) { 3657 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3658 if (sub_schurs->S_Ej_all) { 3659 Mat S_new,tmat; 3660 ISLocalToGlobalMapping NtoSall; 3661 IS is_all_N,is_V,is_V_Sall; 3662 const PetscScalar *array; 3663 const PetscInt *idxs_V,*idxs_all; 3664 PetscInt i,n_V; 3665 3666 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3667 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3668 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3669 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3670 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3671 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3672 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3673 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3674 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3675 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3676 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3677 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3678 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3679 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3680 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3681 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3682 for (i=0;i<n_V;i++) { 3683 PetscScalar val; 3684 PetscInt idx; 3685 3686 idx = idxs_V[i]; 3687 val = array[idxs_all[idxs_V[i]]]; 3688 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3689 } 3690 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 sub_schurs->S_Ej_all = S_new; 3693 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3694 if (sub_schurs->sum_S_Ej_all) { 3695 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3696 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3697 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3698 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3699 sub_schurs->sum_S_Ej_all = S_new; 3700 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3701 } 3702 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3703 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3704 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3705 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3706 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3707 } 3708 } 3709 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3710 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3711 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3712 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3713 } 3714 3715 /* set up change of basis context */ 3716 if (pcbddc->ChangeOfBasisMatrix) { 3717 PCBDDCChange_ctx change_ctx; 3718 3719 if (!pcbddc->new_global_mat) { 3720 PetscInt global_size,local_size; 3721 3722 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3723 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3724 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3725 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3726 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3727 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3728 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3729 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3730 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3731 } else { 3732 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3733 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3734 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3735 } 3736 if (!pcbddc->user_ChangeOfBasisMatrix) { 3737 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3738 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3739 } else { 3740 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3741 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3742 } 3743 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3744 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3745 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3746 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3747 } 3748 3749 /* add pressure dofs to set of primal nodes for numbering purposes */ 3750 for (i=0;i<pcbddc->benign_n;i++) { 3751 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3752 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3753 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3754 pcbddc->local_primal_size_cc++; 3755 pcbddc->local_primal_size++; 3756 } 3757 3758 /* check if a new primal space has been introduced (also take into account benign trick) */ 3759 pcbddc->new_primal_space_local = PETSC_TRUE; 3760 if (olocal_primal_size == pcbddc->local_primal_size) { 3761 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3762 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3763 if (!pcbddc->new_primal_space_local) { 3764 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3765 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3766 } 3767 } 3768 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3769 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3770 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3771 3772 /* flush dbg viewer */ 3773 if (pcbddc->dbg_flag) { 3774 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3775 } 3776 3777 /* free workspace */ 3778 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3779 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3780 if (!pcbddc->adaptive_selection) { 3781 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3782 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3783 } else { 3784 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3785 pcbddc->adaptive_constraints_idxs_ptr, 3786 pcbddc->adaptive_constraints_data_ptr, 3787 pcbddc->adaptive_constraints_idxs, 3788 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3789 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3790 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3791 } 3792 PetscFunctionReturn(0); 3793 } 3794 3795 #undef __FUNCT__ 3796 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3797 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3798 { 3799 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3800 PC_IS *pcis = (PC_IS*)pc->data; 3801 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3802 PetscInt ierr,i,vertex_size,N; 3803 PetscViewer viewer=pcbddc->dbg_viewer; 3804 3805 PetscFunctionBegin; 3806 /* Reset previously computed graph */ 3807 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3808 /* Init local Graph struct */ 3809 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3810 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3811 3812 /* Check validity of the csr graph passed in by the user */ 3813 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3814 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3815 } 3816 3817 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3818 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3819 PetscInt *xadj,*adjncy; 3820 PetscInt nvtxs; 3821 PetscBool flg_row=PETSC_FALSE; 3822 3823 if (pcbddc->use_local_adj) { 3824 3825 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3826 if (flg_row) { 3827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3828 pcbddc->computed_rowadj = PETSC_TRUE; 3829 } 3830 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3831 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3832 IS is_dummy; 3833 ISLocalToGlobalMapping l2gmap_dummy; 3834 PetscInt j,sum; 3835 PetscInt *cxadj,*cadjncy; 3836 const PetscInt *idxs; 3837 PCBDDCGraph graph; 3838 PetscBT is_on_boundary; 3839 3840 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3841 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3842 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3843 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3844 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3845 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3846 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3847 if (flg_row) { 3848 graph->xadj = xadj; 3849 graph->adjncy = adjncy; 3850 } 3851 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3852 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3853 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3854 3855 if (pcbddc->dbg_flag) { 3856 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3857 for (i=0;i<graph->ncc;i++) { 3858 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3859 } 3860 } 3861 3862 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3863 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3864 for (i=0;i<pcis->n_B;i++) { 3865 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3866 } 3867 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3868 3869 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3870 sum = 0; 3871 for (i=0;i<graph->ncc;i++) { 3872 PetscInt sizecc = 0; 3873 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3874 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3875 sizecc++; 3876 } 3877 } 3878 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3879 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3880 cxadj[graph->queue[j]] = sizecc; 3881 } 3882 } 3883 sum += sizecc*sizecc; 3884 } 3885 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3886 sum = 0; 3887 for (i=0;i<pcis->n;i++) { 3888 PetscInt temp = cxadj[i]; 3889 cxadj[i] = sum; 3890 sum += temp; 3891 } 3892 cxadj[pcis->n] = sum; 3893 for (i=0;i<graph->ncc;i++) { 3894 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3895 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3896 PetscInt k,sizecc = 0; 3897 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3898 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3899 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3900 sizecc++; 3901 } 3902 } 3903 } 3904 } 3905 } 3906 if (sum) { 3907 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3908 } else { 3909 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3910 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3911 } 3912 graph->xadj = 0; 3913 graph->adjncy = 0; 3914 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3915 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3916 } 3917 } 3918 if (pcbddc->dbg_flag) { 3919 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3920 } 3921 3922 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3923 vertex_size = 1; 3924 if (pcbddc->user_provided_isfordofs) { 3925 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3926 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3927 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3928 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3929 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3930 } 3931 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3932 pcbddc->n_ISForDofs = 0; 3933 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3934 } 3935 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3936 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3937 } else { 3938 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3939 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3940 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3941 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3942 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3943 } 3944 } 3945 } 3946 3947 /* Setup of Graph */ 3948 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3949 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3950 } 3951 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3952 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3953 } 3954 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 3955 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3956 } 3957 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3958 3959 /* attach info on disconnected subdomains if present */ 3960 if (pcbddc->n_local_subs) { 3961 PetscInt *local_subs; 3962 3963 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3964 for (i=0;i<pcbddc->n_local_subs;i++) { 3965 const PetscInt *idxs; 3966 PetscInt nl,j; 3967 3968 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3969 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3970 for (j=0;j<nl;j++) { 3971 local_subs[idxs[j]] = i; 3972 } 3973 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3974 } 3975 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3976 pcbddc->mat_graph->local_subs = local_subs; 3977 } 3978 3979 /* Graph's connected components analysis */ 3980 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3981 3982 /* print some info to stdout */ 3983 if (pcbddc->dbg_flag) { 3984 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3985 } 3986 3987 /* mark topography has done */ 3988 pcbddc->recompute_topography = PETSC_FALSE; 3989 PetscFunctionReturn(0); 3990 } 3991 3992 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3993 #undef __FUNCT__ 3994 #define __FUNCT__ "PCBDDCSubsetNumbering" 3995 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3996 { 3997 PetscSF sf; 3998 PetscLayout map; 3999 const PetscInt *idxs; 4000 PetscInt *leaf_data,*root_data,*gidxs; 4001 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 4002 PetscInt n_n,nlocals,start,first_index; 4003 PetscMPIInt commsize; 4004 PetscBool first_found; 4005 PetscErrorCode ierr; 4006 4007 PetscFunctionBegin; 4008 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 4009 if (subset_mult) { 4010 PetscCheckSameComm(subset,1,subset_mult,2); 4011 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 4012 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 4013 } 4014 /* create workspace layout for computing global indices of subset */ 4015 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4016 lbounds[0] = lbounds[1] = 0; 4017 for (i=0;i<n;i++) { 4018 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4019 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4020 } 4021 lbounds[0] = -lbounds[0]; 4022 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4023 gbounds[0] = -gbounds[0]; 4024 N = gbounds[1] - gbounds[0] + 1; 4025 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4026 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4027 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4028 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4029 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4030 4031 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4032 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4033 if (subset_mult) { 4034 const PetscInt* idxs_mult; 4035 4036 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4037 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4038 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4039 } else { 4040 for (i=0;i<n;i++) leaf_data[i] = 1; 4041 } 4042 /* local size of new subset */ 4043 n_n = 0; 4044 for (i=0;i<n;i++) n_n += leaf_data[i]; 4045 4046 /* global indexes in layout */ 4047 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4048 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4049 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4050 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4051 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4052 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4053 4054 /* reduce from leaves to roots */ 4055 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4056 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4057 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4058 4059 /* count indexes in local part of layout */ 4060 nlocals = 0; 4061 first_index = -1; 4062 first_found = PETSC_FALSE; 4063 for (i=0;i<Nl;i++) { 4064 if (!first_found && root_data[i]) { 4065 first_found = PETSC_TRUE; 4066 first_index = i; 4067 } 4068 nlocals += root_data[i]; 4069 } 4070 4071 /* cumulative of number of indexes and size of subset without holes */ 4072 #if defined(PETSC_HAVE_MPI_EXSCAN) 4073 start = 0; 4074 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4075 #else 4076 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4077 start = start-nlocals; 4078 #endif 4079 4080 if (N_n) { /* compute total size of new subset if requested */ 4081 *N_n = start + nlocals; 4082 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4083 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4084 } 4085 4086 /* adapt root data with cumulative */ 4087 if (first_found) { 4088 PetscInt old_index; 4089 4090 root_data[first_index] += start; 4091 old_index = first_index; 4092 for (i=first_index+1;i<Nl;i++) { 4093 if (root_data[i]) { 4094 root_data[i] += root_data[old_index]; 4095 old_index = i; 4096 } 4097 } 4098 } 4099 4100 /* from roots to leaves */ 4101 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4102 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4103 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4104 4105 /* create new IS with global indexes without holes */ 4106 if (subset_mult) { 4107 const PetscInt* idxs_mult; 4108 PetscInt cum; 4109 4110 cum = 0; 4111 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4112 for (i=0;i<n;i++) { 4113 PetscInt j; 4114 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4115 } 4116 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4117 } else { 4118 for (i=0;i<n;i++) { 4119 gidxs[i] = leaf_data[i]-1; 4120 } 4121 } 4122 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4123 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4124 PetscFunctionReturn(0); 4125 } 4126 4127 #undef __FUNCT__ 4128 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4129 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4130 { 4131 PetscInt i,j; 4132 PetscScalar *alphas; 4133 PetscErrorCode ierr; 4134 4135 PetscFunctionBegin; 4136 /* this implements stabilized Gram-Schmidt */ 4137 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4138 for (i=0;i<n;i++) { 4139 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4140 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4141 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4142 } 4143 ierr = PetscFree(alphas);CHKERRQ(ierr); 4144 PetscFunctionReturn(0); 4145 } 4146 4147 #undef __FUNCT__ 4148 #define __FUNCT__ "MatISGetSubassemblingPattern" 4149 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4150 { 4151 IS ranks_send_to; 4152 PetscInt n_neighs,*neighs,*n_shared,**shared; 4153 PetscMPIInt size,rank,color; 4154 PetscInt *xadj,*adjncy; 4155 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4156 PetscInt i,local_size,threshold=0; 4157 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4158 PetscSubcomm subcomm; 4159 PetscErrorCode ierr; 4160 4161 PetscFunctionBegin; 4162 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4163 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4164 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4165 4166 /* Get info on mapping */ 4167 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4168 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4169 4170 /* build local CSR graph of subdomains' connectivity */ 4171 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4172 xadj[0] = 0; 4173 xadj[1] = PetscMax(n_neighs-1,0); 4174 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4175 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4176 4177 if (threshold) { 4178 PetscInt xadj_count = 0; 4179 for (i=1;i<n_neighs;i++) { 4180 if (n_shared[i] > threshold) { 4181 adjncy[xadj_count] = neighs[i]; 4182 adjncy_wgt[xadj_count] = n_shared[i]; 4183 xadj_count++; 4184 } 4185 } 4186 xadj[1] = xadj_count; 4187 } else { 4188 if (xadj[1]) { 4189 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4190 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4191 } 4192 } 4193 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4194 if (use_square) { 4195 for (i=0;i<xadj[1];i++) { 4196 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4197 } 4198 } 4199 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4200 4201 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4202 4203 /* 4204 Restrict work on active processes only. 4205 */ 4206 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4207 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4208 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4209 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4210 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4211 if (color) { 4212 ierr = PetscFree(xadj);CHKERRQ(ierr); 4213 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4214 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4215 } else { 4216 Mat subdomain_adj; 4217 IS new_ranks,new_ranks_contig; 4218 MatPartitioning partitioner; 4219 PetscInt prank,rstart=0,rend=0; 4220 PetscInt *is_indices,*oldranks; 4221 PetscBool aggregate; 4222 4223 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4224 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4225 prank = rank; 4226 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4227 /* 4228 for (i=0;i<size;i++) { 4229 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4230 } 4231 */ 4232 for (i=0;i<xadj[1];i++) { 4233 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4234 } 4235 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4236 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4237 if (aggregate) { 4238 PetscInt lrows,row,ncols,*cols; 4239 PetscMPIInt nrank; 4240 PetscScalar *vals; 4241 4242 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4243 lrows = 0; 4244 if (nrank<redprocs) { 4245 lrows = size/redprocs; 4246 if (nrank<size%redprocs) lrows++; 4247 } 4248 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4249 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4250 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4251 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4252 row = nrank; 4253 ncols = xadj[1]-xadj[0]; 4254 cols = adjncy; 4255 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4256 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4257 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4258 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4259 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4260 ierr = PetscFree(xadj);CHKERRQ(ierr); 4261 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4262 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4263 ierr = PetscFree(vals);CHKERRQ(ierr); 4264 } else { 4265 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4266 } 4267 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4268 4269 /* Partition */ 4270 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4271 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4272 if (use_vwgt) { 4273 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4274 v_wgt[0] = local_size; 4275 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4276 } 4277 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4278 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4279 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4280 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4281 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4282 4283 /* renumber new_ranks to avoid "holes" in new set of processors */ 4284 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4285 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4286 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4287 if (!redprocs) { 4288 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4289 } else { 4290 PetscInt idxs[1]; 4291 PetscMPIInt tag; 4292 MPI_Request *reqs; 4293 4294 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4295 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4296 for (i=rstart;i<rend;i++) { 4297 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4298 } 4299 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4300 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4301 ierr = PetscFree(reqs);CHKERRQ(ierr); 4302 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4303 } 4304 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4305 /* clean up */ 4306 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4307 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4308 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4309 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4310 } 4311 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4312 4313 /* assemble parallel IS for sends */ 4314 i = 1; 4315 if (color) i=0; 4316 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4317 /* get back IS */ 4318 *is_sends = ranks_send_to; 4319 PetscFunctionReturn(0); 4320 } 4321 4322 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4323 4324 #undef __FUNCT__ 4325 #define __FUNCT__ "MatISSubassemble" 4326 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4327 { 4328 Mat local_mat; 4329 IS is_sends_internal; 4330 PetscInt rows,cols,new_local_rows; 4331 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4332 PetscBool ismatis,isdense,newisdense,destroy_mat; 4333 ISLocalToGlobalMapping l2gmap; 4334 PetscInt* l2gmap_indices; 4335 const PetscInt* is_indices; 4336 MatType new_local_type; 4337 /* buffers */ 4338 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4339 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4340 PetscInt *recv_buffer_idxs_local; 4341 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4342 /* MPI */ 4343 MPI_Comm comm,comm_n; 4344 PetscSubcomm subcomm; 4345 PetscMPIInt n_sends,n_recvs,commsize; 4346 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4347 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4348 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4349 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4350 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4351 PetscErrorCode ierr; 4352 4353 PetscFunctionBegin; 4354 /* TODO: add missing checks */ 4355 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4356 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4357 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4358 PetscValidLogicalCollectiveInt(mat,nis,7); 4359 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4360 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4361 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4362 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4363 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4364 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4365 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4366 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4367 PetscInt mrows,mcols,mnrows,mncols; 4368 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4369 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4370 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4371 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4372 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4373 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4374 } 4375 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4376 PetscValidLogicalCollectiveInt(mat,bs,0); 4377 /* prepare IS for sending if not provided */ 4378 if (!is_sends) { 4379 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4380 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4381 } else { 4382 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4383 is_sends_internal = is_sends; 4384 } 4385 4386 /* get comm */ 4387 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4388 4389 /* compute number of sends */ 4390 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4391 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4392 4393 /* compute number of receives */ 4394 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4395 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4396 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4397 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4398 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4399 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4400 ierr = PetscFree(iflags);CHKERRQ(ierr); 4401 4402 /* restrict comm if requested */ 4403 subcomm = 0; 4404 destroy_mat = PETSC_FALSE; 4405 if (restrict_comm) { 4406 PetscMPIInt color,subcommsize; 4407 4408 color = 0; 4409 if (restrict_full) { 4410 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4411 } else { 4412 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4413 } 4414 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4415 subcommsize = commsize - subcommsize; 4416 /* check if reuse has been requested */ 4417 if (reuse == MAT_REUSE_MATRIX) { 4418 if (*mat_n) { 4419 PetscMPIInt subcommsize2; 4420 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4421 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4422 comm_n = PetscObjectComm((PetscObject)*mat_n); 4423 } else { 4424 comm_n = PETSC_COMM_SELF; 4425 } 4426 } else { /* MAT_INITIAL_MATRIX */ 4427 PetscMPIInt rank; 4428 4429 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4430 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4431 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4432 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4433 comm_n = PetscSubcommChild(subcomm); 4434 } 4435 /* flag to destroy *mat_n if not significative */ 4436 if (color) destroy_mat = PETSC_TRUE; 4437 } else { 4438 comm_n = comm; 4439 } 4440 4441 /* prepare send/receive buffers */ 4442 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4443 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4444 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4445 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4446 if (nis) { 4447 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4448 } 4449 4450 /* Get data from local matrices */ 4451 if (!isdense) { 4452 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4453 /* TODO: See below some guidelines on how to prepare the local buffers */ 4454 /* 4455 send_buffer_vals should contain the raw values of the local matrix 4456 send_buffer_idxs should contain: 4457 - MatType_PRIVATE type 4458 - PetscInt size_of_l2gmap 4459 - PetscInt global_row_indices[size_of_l2gmap] 4460 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4461 */ 4462 } else { 4463 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4464 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4465 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4466 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4467 send_buffer_idxs[1] = i; 4468 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4469 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4470 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4471 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4472 for (i=0;i<n_sends;i++) { 4473 ilengths_vals[is_indices[i]] = len*len; 4474 ilengths_idxs[is_indices[i]] = len+2; 4475 } 4476 } 4477 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4478 /* additional is (if any) */ 4479 if (nis) { 4480 PetscMPIInt psum; 4481 PetscInt j; 4482 for (j=0,psum=0;j<nis;j++) { 4483 PetscInt plen; 4484 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4485 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4486 psum += len+1; /* indices + lenght */ 4487 } 4488 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4489 for (j=0,psum=0;j<nis;j++) { 4490 PetscInt plen; 4491 const PetscInt *is_array_idxs; 4492 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4493 send_buffer_idxs_is[psum] = plen; 4494 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4495 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4496 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4497 psum += plen+1; /* indices + lenght */ 4498 } 4499 for (i=0;i<n_sends;i++) { 4500 ilengths_idxs_is[is_indices[i]] = psum; 4501 } 4502 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4503 } 4504 4505 buf_size_idxs = 0; 4506 buf_size_vals = 0; 4507 buf_size_idxs_is = 0; 4508 for (i=0;i<n_recvs;i++) { 4509 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4510 buf_size_vals += (PetscInt)olengths_vals[i]; 4511 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4512 } 4513 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4514 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4515 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4516 4517 /* get new tags for clean communications */ 4518 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4519 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4520 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4521 4522 /* allocate for requests */ 4523 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4524 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4525 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4526 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4527 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4528 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4529 4530 /* communications */ 4531 ptr_idxs = recv_buffer_idxs; 4532 ptr_vals = recv_buffer_vals; 4533 ptr_idxs_is = recv_buffer_idxs_is; 4534 for (i=0;i<n_recvs;i++) { 4535 source_dest = onodes[i]; 4536 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4537 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4538 ptr_idxs += olengths_idxs[i]; 4539 ptr_vals += olengths_vals[i]; 4540 if (nis) { 4541 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4542 ptr_idxs_is += olengths_idxs_is[i]; 4543 } 4544 } 4545 for (i=0;i<n_sends;i++) { 4546 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4547 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4548 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4549 if (nis) { 4550 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4551 } 4552 } 4553 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4554 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4555 4556 /* assemble new l2g map */ 4557 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4558 ptr_idxs = recv_buffer_idxs; 4559 new_local_rows = 0; 4560 for (i=0;i<n_recvs;i++) { 4561 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4562 ptr_idxs += olengths_idxs[i]; 4563 } 4564 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4565 ptr_idxs = recv_buffer_idxs; 4566 new_local_rows = 0; 4567 for (i=0;i<n_recvs;i++) { 4568 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4569 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4570 ptr_idxs += olengths_idxs[i]; 4571 } 4572 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4573 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4574 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4575 4576 /* infer new local matrix type from received local matrices type */ 4577 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4578 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4579 if (n_recvs) { 4580 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4581 ptr_idxs = recv_buffer_idxs; 4582 for (i=0;i<n_recvs;i++) { 4583 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4584 new_local_type_private = MATAIJ_PRIVATE; 4585 break; 4586 } 4587 ptr_idxs += olengths_idxs[i]; 4588 } 4589 switch (new_local_type_private) { 4590 case MATDENSE_PRIVATE: 4591 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4592 new_local_type = MATSEQAIJ; 4593 bs = 1; 4594 } else { /* if I receive only 1 dense matrix */ 4595 new_local_type = MATSEQDENSE; 4596 bs = 1; 4597 } 4598 break; 4599 case MATAIJ_PRIVATE: 4600 new_local_type = MATSEQAIJ; 4601 bs = 1; 4602 break; 4603 case MATBAIJ_PRIVATE: 4604 new_local_type = MATSEQBAIJ; 4605 break; 4606 case MATSBAIJ_PRIVATE: 4607 new_local_type = MATSEQSBAIJ; 4608 break; 4609 default: 4610 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4611 break; 4612 } 4613 } else { /* by default, new_local_type is seqdense */ 4614 new_local_type = MATSEQDENSE; 4615 bs = 1; 4616 } 4617 4618 /* create MATIS object if needed */ 4619 if (reuse == MAT_INITIAL_MATRIX) { 4620 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4621 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4622 } else { 4623 /* it also destroys the local matrices */ 4624 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4625 } 4626 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4627 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4628 4629 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4630 4631 /* Global to local map of received indices */ 4632 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4633 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4634 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4635 4636 /* restore attributes -> type of incoming data and its size */ 4637 buf_size_idxs = 0; 4638 for (i=0;i<n_recvs;i++) { 4639 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4640 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4641 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4642 } 4643 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4644 4645 /* set preallocation */ 4646 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4647 if (!newisdense) { 4648 PetscInt *new_local_nnz=0; 4649 4650 ptr_vals = recv_buffer_vals; 4651 ptr_idxs = recv_buffer_idxs_local; 4652 if (n_recvs) { 4653 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4654 } 4655 for (i=0;i<n_recvs;i++) { 4656 PetscInt j; 4657 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4658 for (j=0;j<*(ptr_idxs+1);j++) { 4659 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4660 } 4661 } else { 4662 /* TODO */ 4663 } 4664 ptr_idxs += olengths_idxs[i]; 4665 } 4666 if (new_local_nnz) { 4667 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4668 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4669 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4670 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4671 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4672 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4673 } else { 4674 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4675 } 4676 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4677 } else { 4678 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4679 } 4680 4681 /* set values */ 4682 ptr_vals = recv_buffer_vals; 4683 ptr_idxs = recv_buffer_idxs_local; 4684 for (i=0;i<n_recvs;i++) { 4685 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4686 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4687 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4688 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4689 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4690 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4691 } else { 4692 /* TODO */ 4693 } 4694 ptr_idxs += olengths_idxs[i]; 4695 ptr_vals += olengths_vals[i]; 4696 } 4697 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4698 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4699 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4700 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4701 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4702 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4703 4704 #if 0 4705 if (!restrict_comm) { /* check */ 4706 Vec lvec,rvec; 4707 PetscReal infty_error; 4708 4709 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4710 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4711 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4712 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4713 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4714 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4715 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4716 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4717 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4718 } 4719 #endif 4720 4721 /* assemble new additional is (if any) */ 4722 if (nis) { 4723 PetscInt **temp_idxs,*count_is,j,psum; 4724 4725 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4726 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4727 ptr_idxs = recv_buffer_idxs_is; 4728 psum = 0; 4729 for (i=0;i<n_recvs;i++) { 4730 for (j=0;j<nis;j++) { 4731 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4732 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4733 psum += plen; 4734 ptr_idxs += plen+1; /* shift pointer to received data */ 4735 } 4736 } 4737 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4738 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4739 for (i=1;i<nis;i++) { 4740 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4741 } 4742 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4743 ptr_idxs = recv_buffer_idxs_is; 4744 for (i=0;i<n_recvs;i++) { 4745 for (j=0;j<nis;j++) { 4746 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4747 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4748 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4749 ptr_idxs += plen+1; /* shift pointer to received data */ 4750 } 4751 } 4752 for (i=0;i<nis;i++) { 4753 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4754 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4755 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4756 } 4757 ierr = PetscFree(count_is);CHKERRQ(ierr); 4758 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4759 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4760 } 4761 /* free workspace */ 4762 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4763 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4764 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4765 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4766 if (isdense) { 4767 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4768 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4769 } else { 4770 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4771 } 4772 if (nis) { 4773 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4774 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4775 } 4776 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4777 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4778 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4779 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4780 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4781 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4782 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4783 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4784 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4785 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4786 ierr = PetscFree(onodes);CHKERRQ(ierr); 4787 if (nis) { 4788 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4789 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4790 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4791 } 4792 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4793 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4794 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4795 for (i=0;i<nis;i++) { 4796 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4797 } 4798 *mat_n = NULL; 4799 } 4800 PetscFunctionReturn(0); 4801 } 4802 4803 /* temporary hack into ksp private data structure */ 4804 #include <petsc/private/kspimpl.h> 4805 4806 #undef __FUNCT__ 4807 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4808 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4809 { 4810 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4811 PC_IS *pcis = (PC_IS*)pc->data; 4812 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4813 MatNullSpace CoarseNullSpace=NULL; 4814 ISLocalToGlobalMapping coarse_islg; 4815 IS coarse_is,*isarray; 4816 PetscInt i,im_active=-1,active_procs=-1; 4817 PetscInt nis,nisdofs,nisneu,nisvert; 4818 PC pc_temp; 4819 PCType coarse_pc_type; 4820 KSPType coarse_ksp_type; 4821 PetscBool multilevel_requested,multilevel_allowed; 4822 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4823 Mat t_coarse_mat_is; 4824 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4825 PetscMPIInt all_procs; 4826 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4827 PetscBool compute_vecs = PETSC_FALSE; 4828 PetscScalar *array; 4829 PetscErrorCode ierr; 4830 4831 PetscFunctionBegin; 4832 /* Assign global numbering to coarse dofs */ 4833 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4834 PetscInt ocoarse_size; 4835 compute_vecs = PETSC_TRUE; 4836 ocoarse_size = pcbddc->coarse_size; 4837 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4838 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4839 /* see if we can avoid some work */ 4840 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4841 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4842 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4843 PC pc; 4844 PetscBool isbddc; 4845 4846 /* temporary workaround since PCBDDC does not have a reset method so far */ 4847 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4848 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4849 if (isbddc) { 4850 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4851 } 4852 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4853 coarse_reuse = PETSC_FALSE; 4854 } else { /* we can safely reuse already computed coarse matrix */ 4855 coarse_reuse = PETSC_TRUE; 4856 } 4857 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4858 coarse_reuse = PETSC_FALSE; 4859 } 4860 /* reset any subassembling information */ 4861 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4862 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4863 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4864 coarse_reuse = PETSC_TRUE; 4865 } 4866 4867 /* count "active" (i.e. with positive local size) and "void" processes */ 4868 im_active = !!(pcis->n); 4869 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4870 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4871 void_procs = all_procs-active_procs; 4872 csin_type_simple = PETSC_TRUE; 4873 redist = PETSC_FALSE; 4874 if (pcbddc->current_level && void_procs) { 4875 csin_ml = PETSC_TRUE; 4876 ncoarse_ml = void_procs; 4877 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4878 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4879 csin_ds = PETSC_TRUE; 4880 ncoarse_ds = pcbddc->redistribute_coarse; 4881 redist = PETSC_TRUE; 4882 } else { 4883 csin_ds = PETSC_TRUE; 4884 ncoarse_ds = active_procs; 4885 redist = PETSC_TRUE; 4886 } 4887 } else { 4888 csin_ml = PETSC_FALSE; 4889 ncoarse_ml = all_procs; 4890 if (void_procs) { 4891 csin_ds = PETSC_TRUE; 4892 ncoarse_ds = void_procs; 4893 csin_type_simple = PETSC_FALSE; 4894 } else { 4895 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4896 csin_ds = PETSC_TRUE; 4897 ncoarse_ds = pcbddc->redistribute_coarse; 4898 redist = PETSC_TRUE; 4899 } else { 4900 csin_ds = PETSC_FALSE; 4901 ncoarse_ds = all_procs; 4902 } 4903 } 4904 } 4905 4906 /* 4907 test if we can go multilevel: three conditions must be satisfied: 4908 - we have not exceeded the number of levels requested 4909 - we can actually subassemble the active processes 4910 - we can find a suitable number of MPI processes where we can place the subassembled problem 4911 */ 4912 multilevel_allowed = PETSC_FALSE; 4913 multilevel_requested = PETSC_FALSE; 4914 if (pcbddc->current_level < pcbddc->max_levels) { 4915 multilevel_requested = PETSC_TRUE; 4916 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4917 multilevel_allowed = PETSC_FALSE; 4918 } else { 4919 multilevel_allowed = PETSC_TRUE; 4920 } 4921 } 4922 /* determine number of process partecipating to coarse solver */ 4923 if (multilevel_allowed) { 4924 ncoarse = ncoarse_ml; 4925 csin = csin_ml; 4926 redist = PETSC_FALSE; 4927 } else { 4928 ncoarse = ncoarse_ds; 4929 csin = csin_ds; 4930 } 4931 4932 /* creates temporary l2gmap and IS for coarse indexes */ 4933 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4934 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4935 4936 /* creates temporary MATIS object for coarse matrix */ 4937 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4938 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4939 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4940 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4941 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4942 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4943 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4944 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4945 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4946 4947 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4948 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 4949 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4950 const PetscInt *idxs; 4951 ISLocalToGlobalMapping tmap; 4952 4953 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4954 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4955 /* allocate space for temporary storage */ 4956 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4957 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4958 /* allocate for IS array */ 4959 nisdofs = pcbddc->n_ISForDofsLocal; 4960 nisneu = !!pcbddc->NeumannBoundariesLocal; 4961 nisvert = 0; 4962 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4963 nisvert = 1; 4964 } 4965 nis = nisdofs + nisneu + nisvert; 4966 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4967 /* dofs splitting */ 4968 for (i=0;i<nisdofs;i++) { 4969 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4970 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4971 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4972 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4973 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4974 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4975 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4976 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4977 } 4978 /* neumann boundaries */ 4979 if (pcbddc->NeumannBoundariesLocal) { 4980 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4981 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4982 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4983 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4984 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4985 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4986 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4987 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4988 } 4989 /* primal vertices (benign) */ 4990 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4991 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 4992 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4993 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4994 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4995 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4996 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 4997 } 4998 /* free memory */ 4999 ierr = PetscFree(tidxs);CHKERRQ(ierr); 5000 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 5001 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 5002 } else { 5003 nis = 0; 5004 nisdofs = 0; 5005 nisneu = 0; 5006 nisvert = 0; 5007 isarray = NULL; 5008 } 5009 /* destroy no longer needed map */ 5010 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 5011 5012 /* restrict on coarse candidates (if needed) */ 5013 coarse_mat_is = NULL; 5014 if (csin) { 5015 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5016 if (redist) { 5017 PetscMPIInt rank; 5018 PetscInt spc,n_spc_p1,dest[1],destsize; 5019 5020 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5021 spc = active_procs/ncoarse; 5022 n_spc_p1 = active_procs%ncoarse; 5023 if (im_active) { 5024 destsize = 1; 5025 if (rank > n_spc_p1*(spc+1)-1) { 5026 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5027 } else { 5028 dest[0] = rank/(spc+1); 5029 } 5030 } else { 5031 destsize = 0; 5032 } 5033 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5034 } else if (csin_type_simple) { 5035 PetscMPIInt rank; 5036 PetscInt issize,isidx; 5037 5038 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5039 if (im_active) { 5040 issize = 1; 5041 isidx = (PetscInt)rank; 5042 } else { 5043 issize = 0; 5044 isidx = -1; 5045 } 5046 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5047 } else { /* get a suitable subassembling pattern from MATIS code */ 5048 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5049 } 5050 5051 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5052 if (!redist || ncoarse <= void_procs) { 5053 PetscInt ncoarse_cand,tissize,*nisindices; 5054 PetscInt *coarse_candidates; 5055 const PetscInt* tisindices; 5056 5057 /* get coarse candidates' ranks in pc communicator */ 5058 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5059 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5060 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5061 if (!coarse_candidates[i]) { 5062 coarse_candidates[ncoarse_cand++]=i; 5063 } 5064 } 5065 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5066 5067 5068 if (pcbddc->dbg_flag) { 5069 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5070 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5071 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5072 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5073 for (i=0;i<ncoarse_cand;i++) { 5074 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5075 } 5076 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5077 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5078 } 5079 /* shift the pattern on coarse candidates */ 5080 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5081 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5082 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5083 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5084 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5085 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5086 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5087 } 5088 if (pcbddc->dbg_flag) { 5089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5091 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5093 } 5094 } 5095 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5096 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5097 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5098 } else { /* this is the last level, so use just receiving processes in subcomm */ 5099 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5100 } 5101 } else { 5102 if (pcbddc->dbg_flag) { 5103 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5104 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5106 } 5107 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5108 coarse_mat_is = t_coarse_mat_is; 5109 } 5110 5111 /* create local to global scatters for coarse problem */ 5112 if (compute_vecs) { 5113 PetscInt lrows; 5114 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5115 if (coarse_mat_is) { 5116 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5117 } else { 5118 lrows = 0; 5119 } 5120 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5121 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5122 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5123 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5124 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5125 } 5126 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5127 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5128 5129 /* set defaults for coarse KSP and PC */ 5130 if (multilevel_allowed) { 5131 coarse_ksp_type = KSPRICHARDSON; 5132 coarse_pc_type = PCBDDC; 5133 } else { 5134 coarse_ksp_type = KSPPREONLY; 5135 coarse_pc_type = PCREDUNDANT; 5136 } 5137 5138 /* print some info if requested */ 5139 if (pcbddc->dbg_flag) { 5140 if (!multilevel_allowed) { 5141 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5142 if (multilevel_requested) { 5143 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5144 } else if (pcbddc->max_levels) { 5145 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5146 } 5147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5148 } 5149 } 5150 5151 /* create the coarse KSP object only once with defaults */ 5152 if (coarse_mat_is) { 5153 MatReuse coarse_mat_reuse; 5154 PetscViewer dbg_viewer = NULL; 5155 if (pcbddc->dbg_flag) { 5156 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5157 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5158 } 5159 if (!pcbddc->coarse_ksp) { 5160 char prefix[256],str_level[16]; 5161 size_t len; 5162 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5163 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5164 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5165 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5166 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5167 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5168 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5169 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5170 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5171 /* prefix */ 5172 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5173 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5174 if (!pcbddc->current_level) { 5175 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5176 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5177 } else { 5178 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5179 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5180 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5181 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5182 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5183 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5184 } 5185 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5186 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5187 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5188 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5189 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5190 /* allow user customization */ 5191 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5192 } 5193 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5194 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5195 if (nisdofs) { 5196 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5197 for (i=0;i<nisdofs;i++) { 5198 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5199 } 5200 } 5201 if (nisneu) { 5202 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5203 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5204 } 5205 if (nisvert) { 5206 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5207 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5208 } 5209 5210 /* get some info after set from options */ 5211 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5212 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5213 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5214 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5215 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5216 isbddc = PETSC_FALSE; 5217 } 5218 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5219 if (isredundant) { 5220 KSP inner_ksp; 5221 PC inner_pc; 5222 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5223 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5224 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5225 } 5226 5227 /* assemble coarse matrix */ 5228 if (coarse_reuse) { 5229 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5230 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5231 coarse_mat_reuse = MAT_REUSE_MATRIX; 5232 } else { 5233 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5234 } 5235 if (isbddc || isnn) { 5236 if (pcbddc->coarsening_ratio > 1) { 5237 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5238 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5239 if (pcbddc->dbg_flag) { 5240 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5241 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5242 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5243 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5244 } 5245 } 5246 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5247 } else { 5248 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5249 coarse_mat = coarse_mat_is; 5250 } 5251 } else { 5252 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5253 } 5254 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5255 5256 /* propagate symmetry info of coarse matrix */ 5257 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5258 if (pc->pmat->symmetric_set) { 5259 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5260 } 5261 if (pc->pmat->hermitian_set) { 5262 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5263 } 5264 if (pc->pmat->spd_set) { 5265 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5266 } 5267 /* set operators */ 5268 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5269 if (pcbddc->dbg_flag) { 5270 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5271 } 5272 } else { /* processes non partecipating to coarse solver (if any) */ 5273 coarse_mat = 0; 5274 } 5275 ierr = PetscFree(isarray);CHKERRQ(ierr); 5276 #if 0 5277 { 5278 PetscViewer viewer; 5279 char filename[256]; 5280 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5281 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5282 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5283 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5284 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5285 } 5286 #endif 5287 5288 /* Compute coarse null space (special handling by BDDC only) */ 5289 #if 0 5290 if (pcbddc->NullSpace) { 5291 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5292 } 5293 #endif 5294 /* hack */ 5295 if (pcbddc->coarse_ksp) { 5296 Vec crhs,csol; 5297 5298 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5299 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5300 if (!csol) { 5301 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5302 } 5303 if (!crhs) { 5304 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5305 } 5306 } 5307 5308 /* compute null space for coarse solver if the benign trick has been requested */ 5309 if (pcbddc->benign_null) { 5310 5311 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5312 for (i=0;i<pcbddc->benign_n;i++) { 5313 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5314 } 5315 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5316 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5317 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5318 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5319 if (coarse_mat) { 5320 Vec nullv; 5321 PetscScalar *array,*array2; 5322 PetscInt nl; 5323 5324 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5325 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5326 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5327 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5328 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5329 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5330 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5331 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5332 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5333 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5334 } 5335 } 5336 5337 if (pcbddc->coarse_ksp) { 5338 PetscBool ispreonly; 5339 5340 if (CoarseNullSpace) { 5341 PetscBool isnull; 5342 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5343 if (isnull) { 5344 if (isbddc && !pcbddc->benign_saddle_point) { 5345 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5346 } else { 5347 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5348 } 5349 } else { 5350 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5351 } 5352 } 5353 /* setup coarse ksp */ 5354 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5355 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5356 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5357 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5358 KSP check_ksp; 5359 KSPType check_ksp_type; 5360 PC check_pc; 5361 Vec check_vec,coarse_vec; 5362 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5363 PetscInt its; 5364 PetscBool compute_eigs; 5365 PetscReal *eigs_r,*eigs_c; 5366 PetscInt neigs; 5367 const char *prefix; 5368 5369 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5370 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5371 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5372 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5373 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5374 if (ispreonly) { 5375 check_ksp_type = KSPPREONLY; 5376 compute_eigs = PETSC_FALSE; 5377 } else { 5378 check_ksp_type = KSPGMRES; 5379 compute_eigs = PETSC_TRUE; 5380 } 5381 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5382 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5383 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5384 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5385 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5386 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5387 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5388 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5389 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5390 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5391 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5392 /* create random vec */ 5393 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5394 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5395 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5396 if (CoarseNullSpace) { 5397 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5398 } 5399 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5400 /* solve coarse problem */ 5401 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5402 if (CoarseNullSpace) { 5403 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5404 } 5405 /* set eigenvalue estimation if preonly has not been requested */ 5406 if (compute_eigs) { 5407 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5408 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5409 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5410 lambda_max = eigs_r[neigs-1]; 5411 lambda_min = eigs_r[0]; 5412 if (pcbddc->use_coarse_estimates) { 5413 if (lambda_max>lambda_min) { 5414 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5415 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5416 } 5417 } 5418 } 5419 5420 /* check coarse problem residual error */ 5421 if (pcbddc->dbg_flag) { 5422 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5423 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5424 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5425 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5426 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5427 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5428 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5429 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5430 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5431 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5432 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5433 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5434 if (CoarseNullSpace) { 5435 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5436 } 5437 if (compute_eigs) { 5438 PetscReal lambda_max_s,lambda_min_s; 5439 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5440 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5441 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5442 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5443 for (i=0;i<neigs;i++) { 5444 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5445 } 5446 } 5447 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5448 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5449 } 5450 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5451 if (compute_eigs) { 5452 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5453 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5454 } 5455 } 5456 } 5457 /* print additional info */ 5458 if (pcbddc->dbg_flag) { 5459 /* waits until all processes reaches this point */ 5460 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5461 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5462 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5463 } 5464 5465 /* free memory */ 5466 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5467 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5468 PetscFunctionReturn(0); 5469 } 5470 5471 #undef __FUNCT__ 5472 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5473 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5474 { 5475 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5476 PC_IS* pcis = (PC_IS*)pc->data; 5477 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5478 IS subset,subset_mult,subset_n; 5479 PetscInt local_size,coarse_size=0; 5480 PetscInt *local_primal_indices=NULL; 5481 const PetscInt *t_local_primal_indices; 5482 PetscErrorCode ierr; 5483 5484 PetscFunctionBegin; 5485 /* Compute global number of coarse dofs */ 5486 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5487 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5488 } 5489 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5490 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5491 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5492 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5493 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5494 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5495 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5496 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5497 if (local_size != pcbddc->local_primal_size) { 5498 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5499 } 5500 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5501 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5502 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5503 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5504 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5505 5506 /* check numbering */ 5507 if (pcbddc->dbg_flag) { 5508 PetscScalar coarsesum,*array,*array2; 5509 PetscInt i; 5510 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5511 5512 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5513 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5514 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5515 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5516 /* counter */ 5517 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5518 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5519 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5520 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5521 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5522 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5523 5524 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5525 for (i=0;i<pcbddc->local_primal_size;i++) { 5526 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5527 } 5528 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5529 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5530 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5531 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5532 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5533 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5534 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5535 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5536 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5537 for (i=0;i<pcis->n;i++) { 5538 if (array[i] != 0.0 && array[i] != array2[i]) { 5539 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5540 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5541 set_error = PETSC_TRUE; 5542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5543 } 5544 } 5545 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5546 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5548 for (i=0;i<pcis->n;i++) { 5549 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5550 } 5551 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5552 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5553 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5554 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5555 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5556 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5557 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5558 PetscInt *gidxs; 5559 5560 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5561 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5562 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5563 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5564 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5565 for (i=0;i<pcbddc->local_primal_size;i++) { 5566 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5567 } 5568 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5569 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5570 } 5571 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5572 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5573 } 5574 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5575 /* get back data */ 5576 *coarse_size_n = coarse_size; 5577 *local_primal_indices_n = local_primal_indices; 5578 PetscFunctionReturn(0); 5579 } 5580 5581 #undef __FUNCT__ 5582 #define __FUNCT__ "PCBDDCGlobalToLocal" 5583 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5584 { 5585 IS localis_t; 5586 PetscInt i,lsize,*idxs,n; 5587 PetscScalar *vals; 5588 PetscErrorCode ierr; 5589 5590 PetscFunctionBegin; 5591 /* get indices in local ordering exploiting local to global map */ 5592 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5593 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5594 for (i=0;i<lsize;i++) vals[i] = 1.0; 5595 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5596 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5597 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5598 if (idxs) { /* multilevel guard */ 5599 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5600 } 5601 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5602 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5603 ierr = PetscFree(vals);CHKERRQ(ierr); 5604 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5605 /* now compute set in local ordering */ 5606 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5607 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5608 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5609 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5610 for (i=0,lsize=0;i<n;i++) { 5611 if (PetscRealPart(vals[i]) > 0.5) { 5612 lsize++; 5613 } 5614 } 5615 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5616 for (i=0,lsize=0;i<n;i++) { 5617 if (PetscRealPart(vals[i]) > 0.5) { 5618 idxs[lsize++] = i; 5619 } 5620 } 5621 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5622 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5623 *localis = localis_t; 5624 PetscFunctionReturn(0); 5625 } 5626 5627 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5628 #undef __FUNCT__ 5629 #define __FUNCT__ "PCBDDCMatMult_Private" 5630 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5631 { 5632 PCBDDCChange_ctx change_ctx; 5633 PetscErrorCode ierr; 5634 5635 PetscFunctionBegin; 5636 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5637 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5638 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5639 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5640 PetscFunctionReturn(0); 5641 } 5642 5643 #undef __FUNCT__ 5644 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5645 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5646 { 5647 PCBDDCChange_ctx change_ctx; 5648 PetscErrorCode ierr; 5649 5650 PetscFunctionBegin; 5651 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5652 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5653 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5654 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5655 PetscFunctionReturn(0); 5656 } 5657 5658 #undef __FUNCT__ 5659 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5660 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5661 { 5662 PC_IS *pcis=(PC_IS*)pc->data; 5663 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5664 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5665 Mat S_j; 5666 PetscInt *used_xadj,*used_adjncy; 5667 PetscBool free_used_adj; 5668 PetscErrorCode ierr; 5669 5670 PetscFunctionBegin; 5671 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5672 free_used_adj = PETSC_FALSE; 5673 if (pcbddc->sub_schurs_layers == -1) { 5674 used_xadj = NULL; 5675 used_adjncy = NULL; 5676 } else { 5677 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5678 used_xadj = pcbddc->mat_graph->xadj; 5679 used_adjncy = pcbddc->mat_graph->adjncy; 5680 } else if (pcbddc->computed_rowadj) { 5681 used_xadj = pcbddc->mat_graph->xadj; 5682 used_adjncy = pcbddc->mat_graph->adjncy; 5683 } else { 5684 PetscBool flg_row=PETSC_FALSE; 5685 const PetscInt *xadj,*adjncy; 5686 PetscInt nvtxs; 5687 5688 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5689 if (flg_row) { 5690 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5691 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5692 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5693 free_used_adj = PETSC_TRUE; 5694 } else { 5695 pcbddc->sub_schurs_layers = -1; 5696 used_xadj = NULL; 5697 used_adjncy = NULL; 5698 } 5699 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5700 } 5701 } 5702 5703 /* setup sub_schurs data */ 5704 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5705 if (!sub_schurs->use_mumps) { 5706 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5707 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5708 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5709 } else { 5710 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5711 PetscBool isseqaij; 5712 if (!pcbddc->use_vertices && reuse_solvers) { 5713 PetscInt n_vertices; 5714 5715 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5716 reuse_solvers = (PetscBool)!n_vertices; 5717 } 5718 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5719 if (!isseqaij) { 5720 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5721 if (matis->A == pcbddc->local_mat) { 5722 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5723 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5724 } else { 5725 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5726 } 5727 } 5728 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5729 } 5730 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5731 5732 /* free adjacency */ 5733 if (free_used_adj) { 5734 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5735 } 5736 PetscFunctionReturn(0); 5737 } 5738 5739 #undef __FUNCT__ 5740 #define __FUNCT__ "PCBDDCInitSubSchurs" 5741 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5742 { 5743 PC_IS *pcis=(PC_IS*)pc->data; 5744 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5745 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5746 PCBDDCGraph graph; 5747 PetscErrorCode ierr; 5748 5749 PetscFunctionBegin; 5750 /* attach interface graph for determining subsets */ 5751 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5752 IS verticesIS,verticescomm; 5753 PetscInt vsize,*idxs; 5754 5755 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5756 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5757 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5758 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5759 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5760 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5761 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5762 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5763 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5764 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5765 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5766 /* 5767 if (pcbddc->dbg_flag) { 5768 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5769 } 5770 */ 5771 } else { 5772 graph = pcbddc->mat_graph; 5773 } 5774 5775 /* sub_schurs init */ 5776 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5777 5778 /* free graph struct */ 5779 if (pcbddc->sub_schurs_rebuild) { 5780 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5781 } 5782 PetscFunctionReturn(0); 5783 } 5784 5785 #undef __FUNCT__ 5786 #define __FUNCT__ "PCBDDCCheckOperator" 5787 PetscErrorCode PCBDDCCheckOperator(PC pc) 5788 { 5789 PC_IS *pcis=(PC_IS*)pc->data; 5790 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5791 PetscErrorCode ierr; 5792 5793 PetscFunctionBegin; 5794 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5795 IS zerodiag = NULL; 5796 Mat S_j,B0_B=NULL; 5797 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5798 PetscScalar *p0_check,*array,*array2; 5799 PetscReal norm; 5800 PetscInt i; 5801 5802 /* B0 and B0_B */ 5803 if (zerodiag) { 5804 IS dummy; 5805 5806 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5807 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5808 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5809 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5810 } 5811 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5812 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5813 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5814 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5815 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5816 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5817 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5818 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5819 /* S_j */ 5820 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5821 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5822 5823 /* mimic vector in \widetilde{W}_\Gamma */ 5824 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5825 /* continuous in primal space */ 5826 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5827 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5828 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5829 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5830 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5831 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5832 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5833 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5834 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5835 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5836 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5837 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5838 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5839 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5840 5841 /* assemble rhs for coarse problem */ 5842 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5843 /* local with Schur */ 5844 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5845 if (zerodiag) { 5846 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5847 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5848 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5849 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5850 } 5851 /* sum on primal nodes the local contributions */ 5852 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5853 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5854 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5855 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5856 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5857 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5858 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5859 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5860 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5861 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5862 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5863 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5864 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5865 /* scale primal nodes (BDDC sums contibutions) */ 5866 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5867 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5868 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5869 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5870 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5871 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5872 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5873 /* global: \widetilde{B0}_B w_\Gamma */ 5874 if (zerodiag) { 5875 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5876 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5877 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5878 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5879 } 5880 /* BDDC */ 5881 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5882 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5883 5884 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5885 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5886 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5887 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5888 for (i=0;i<pcbddc->benign_n;i++) { 5889 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5890 } 5891 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5892 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5893 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5894 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5895 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5896 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5897 } 5898 PetscFunctionReturn(0); 5899 } 5900