1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 /* TODO: add reuse flag */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqAIJCompress" 12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 13 { 14 Mat Bt; 15 PetscScalar *a,*bdata; 16 const PetscInt *ii,*ij; 17 PetscInt m,n,i,nnz,*bii,*bij; 18 PetscBool flg_row; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 23 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 24 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 25 nnz = n; 26 for (i=0;i<ii[n];i++) { 27 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 28 } 29 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 30 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 31 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 32 nnz = 0; 33 bii[0] = 0; 34 for (i=0;i<n;i++) { 35 PetscInt j; 36 for (j=ii[i];j<ii[i+1];j++) { 37 PetscScalar entry = a[j]; 38 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 39 bij[nnz] = ij[j]; 40 bdata[nnz] = entry; 41 nnz++; 42 } 43 } 44 bii[i+1] = nnz; 45 } 46 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 47 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 48 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 49 { 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 51 b->free_a = PETSC_TRUE; 52 b->free_ij = PETSC_TRUE; 53 } 54 *B = Bt; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDetectDisconnectedComponents" 60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 61 { 62 Mat B; 63 IS is_dummy,*cc_n; 64 ISLocalToGlobalMapping l2gmap_dummy; 65 PCBDDCGraph graph; 66 PetscInt i,n; 67 PetscInt *xadj,*adjncy; 68 PetscInt *xadj_filtered,*adjncy_filtered; 69 PetscBool flg_row,isseqaij; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 74 if (!isseqaij && filter) { 75 PetscBool isseqdense; 76 77 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 78 if (!isseqdense) { 79 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 80 } else { /* TODO: rectangular case and LDA */ 81 PetscScalar *array; 82 PetscReal chop=1.e-6; 83 84 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 86 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 87 for (i=0;i<n;i++) { 88 PetscInt j; 89 for (j=i+1;j<n;j++) { 90 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 91 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 92 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 93 } 94 } 95 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 96 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 97 } 98 } else { 99 B = A; 100 } 101 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 102 103 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 104 if (filter) { 105 PetscScalar *data; 106 PetscInt j,cum; 107 108 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 109 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 110 cum = 0; 111 for (i=0;i<n;i++) { 112 PetscInt t; 113 114 for (j=xadj[i];j<xadj[i+1];j++) { 115 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 116 continue; 117 } 118 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 119 } 120 t = xadj_filtered[i]; 121 xadj_filtered[i] = cum; 122 cum += t; 123 } 124 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 125 } else { 126 xadj_filtered = NULL; 127 adjncy_filtered = NULL; 128 } 129 130 /* compute local connected components using PCBDDCGraph */ 131 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 132 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 133 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 135 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 136 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 137 if (xadj_filtered) { 138 graph->xadj = xadj_filtered; 139 graph->adjncy = adjncy_filtered; 140 } else { 141 graph->xadj = xadj; 142 graph->adjncy = adjncy; 143 } 144 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 145 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 146 /* partial clean up */ 147 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 148 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 149 if (A != B) { 150 ierr = MatDestroy(&B);CHKERRQ(ierr); 151 } 152 153 /* get back data */ 154 if (ncc) *ncc = graph->ncc; 155 if (cc) { 156 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 157 for (i=0;i<graph->ncc;i++) { 158 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 159 } 160 *cc = cc_n; 161 } 162 /* clean up graph */ 163 graph->xadj = 0; 164 graph->adjncy = 0; 165 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "PCBDDCBenignCheck" 171 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 172 { 173 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 174 PC_IS* pcis = (PC_IS*)(pc->data); 175 IS dirIS = NULL; 176 PetscInt i; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 181 if (zerodiag) { 182 Mat A; 183 Vec vec3_N; 184 PetscScalar *vals; 185 const PetscInt *idxs; 186 PetscInt nz; 187 188 /* p0 */ 189 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 190 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 192 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 193 for (i=0;i<nz;i++) vals[i] = 1.; 194 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 195 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 196 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 197 /* v_I */ 198 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 199 for (i=0;i<nz;i++) vals[i] = 0.; 200 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 201 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 202 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 203 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 204 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 205 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 206 if (dirIS) { 207 PetscInt n; 208 209 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 210 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 211 for (i=0;i<n;i++) vals[i] = 0.; 212 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 213 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 214 } 215 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 216 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 217 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 218 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 219 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 220 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 221 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 222 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 223 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 224 } 225 ierr = PetscFree(vals);CHKERRQ(ierr); 226 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 227 } 228 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 229 230 /* check PCBDDCBenignGetOrSetP0 */ 231 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 232 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 233 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 234 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 235 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 236 for (i=0;i<pcbddc->benign_n;i++) { 237 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 238 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 239 } 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 246 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 247 { 248 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 249 IS pressures,zerodiag,*zerodiag_subs; 250 PetscInt nz,n; 251 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 256 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 257 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 258 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 259 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 260 Checks if all the pressure dofs in each subdomain have a zero diagonal 261 If not, a change of basis on pressures is not needed 262 since the local Schur complements are SPD 263 */ 264 has_null_pressures = PETSC_TRUE; 265 have_null = PETSC_TRUE; 266 if (pcbddc->n_ISForDofsLocal) { 267 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 268 269 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 270 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 271 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 272 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 273 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 274 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 275 if (!sorted) { 276 ierr = ISSort(pressures);CHKERRQ(ierr); 277 } 278 } else { 279 pressures = NULL; 280 } 281 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 282 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 283 /* TODO: add check for shared dofs and raise error */ 284 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 285 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 286 if (!sorted) { 287 ierr = ISSort(zerodiag);CHKERRQ(ierr); 288 } 289 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 290 if (!nz) { 291 if (n) have_null = PETSC_FALSE; 292 has_null_pressures = PETSC_FALSE; 293 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 294 } 295 recompute_zerodiag = PETSC_FALSE; 296 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 297 zerodiag_subs = NULL; 298 pcbddc->benign_n = 0; 299 if (has_null_pressures) { 300 IS *subs; 301 PetscInt nsubs,i; 302 303 subs = pcbddc->local_subs; 304 nsubs = pcbddc->n_local_subs; 305 if (nsubs > 1) { 306 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 307 for (i=0;i<nsubs;i++) { 308 ISLocalToGlobalMapping l2g; 309 IS t_zerodiag_subs; 310 PetscInt nl; 311 312 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 313 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 314 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 315 if (nl) { 316 PetscBool valid = PETSC_TRUE; 317 318 if (pressures) { 319 IS t_pressure_subs; 320 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 321 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 322 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 323 } 324 if (valid) { 325 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 326 pcbddc->benign_n++; 327 } else { 328 recompute_zerodiag = PETSC_TRUE; 329 } 330 } 331 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 332 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 333 } 334 } else { /* there's just one subdomain (or zero if they have not been detected */ 335 PetscBool valid = PETSC_TRUE; 336 if (pressures) { 337 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 338 } 339 if (valid) { 340 pcbddc->benign_n = 1; 341 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 342 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 343 zerodiag_subs[0] = zerodiag; 344 } 345 } 346 } 347 348 if (!pcbddc->benign_n) { 349 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 350 recompute_zerodiag = PETSC_FALSE; 351 has_null_pressures = PETSC_FALSE; 352 have_null = PETSC_FALSE; 353 } 354 355 /* final check for null pressures */ 356 if (zerodiag && pressures) { 357 PetscInt nz,np; 358 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 359 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 360 if (nz != np) have_null = PETSC_FALSE; 361 } 362 363 if (recompute_zerodiag) { 364 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 365 if (pcbddc->benign_n == 1) { 366 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 367 zerodiag = zerodiag_subs[0]; 368 } else { 369 PetscInt i,nzn,*new_idxs; 370 371 nzn = 0; 372 for (i=0;i<pcbddc->benign_n;i++) { 373 PetscInt ns; 374 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 375 nzn += ns; 376 } 377 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 378 nzn = 0; 379 for (i=0;i<pcbddc->benign_n;i++) { 380 PetscInt ns,*idxs; 381 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 382 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 383 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 384 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 385 nzn += ns; 386 } 387 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 388 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 389 } 390 have_null = PETSC_FALSE; 391 } 392 393 if (has_null_pressures) { 394 IS zerodiagc; 395 const PetscInt *idxs,*idxsc; 396 PetscInt i,s,*nnz; 397 398 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 399 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 400 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 401 /* local change of basis for pressures */ 402 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 403 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 404 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 405 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 406 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 407 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 408 for (i=0;i<pcbddc->benign_n;i++) { 409 PetscInt nzs,j; 410 411 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 413 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 414 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 415 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 416 } 417 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 418 ierr = PetscFree(nnz);CHKERRQ(ierr); 419 /* set identity on velocities */ 420 for (i=0;i<n-nz;i++) { 421 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 422 } 423 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 424 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 425 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 426 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 427 /* set change on pressures */ 428 for (s=0;s<pcbddc->benign_n;s++) { 429 PetscScalar *array; 430 PetscInt nzs; 431 432 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 433 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 434 for (i=0;i<nzs-1;i++) { 435 PetscScalar vals[2]; 436 PetscInt cols[2]; 437 438 cols[0] = idxs[i]; 439 cols[1] = idxs[nzs-1]; 440 vals[0] = 1.; 441 vals[1] = 1.; 442 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 443 } 444 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 445 for (i=0;i<nzs-1;i++) array[i] = -1.; 446 array[nzs-1] = 1.; 447 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 448 /* store local idxs for p0 */ 449 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 450 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 451 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 452 ierr = PetscFree(array);CHKERRQ(ierr); 453 } 454 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 /* TODO: need optimization? */ 457 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 458 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 459 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 461 /* store global idxs for p0 */ 462 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 463 } 464 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 465 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 466 467 /* determines if the coarse solver will be singular or not */ 468 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 469 *zerodiaglocal = zerodiag; 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 475 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 476 { 477 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 478 PetscErrorCode ierr; 479 480 PetscFunctionBegin; 481 if (!pcbddc->benign_sf) { 482 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 483 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 484 } 485 if (get) { /* use SF to get values */ 486 PetscScalar *array; 487 488 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 489 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 490 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 492 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 493 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 494 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 495 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 502 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 503 { 504 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 /* TODO: add error checking 509 - avoid nested pop (or push) calls. 510 - cannot push before pop. 511 - cannot call this if pcbddc->local_mat is NULL 512 */ 513 if (!pcbddc->benign_n) { 514 PetscFunctionReturn(0); 515 } 516 if (pop) { 517 IS is_p0; 518 MatReuse reuse; 519 520 /* extract B_0 */ 521 reuse = MAT_INITIAL_MATRIX; 522 if (pcbddc->benign_B0) { 523 reuse = MAT_REUSE_MATRIX; 524 } 525 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 526 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 527 /* remove rows and cols from local problem */ 528 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 529 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 530 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 531 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 532 } else { /* push */ 533 PetscInt i; 534 535 for (i=0;i<pcbddc->benign_n;i++) { 536 PetscScalar *B0_vals; 537 PetscInt *B0_cols,B0_ncol; 538 539 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 540 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 541 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 542 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 543 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 544 } 545 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "PCBDDCAdaptiveSelection" 553 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 554 { 555 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 556 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 557 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 558 PetscBLASInt *B_iwork,*B_ifail; 559 PetscScalar *work,lwork; 560 PetscScalar *St,*S,*eigv; 561 PetscScalar *Sarray,*Starray; 562 PetscReal *eigs,thresh; 563 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 564 PetscBool allocated_S_St; 565 #if defined(PETSC_USE_COMPLEX) 566 PetscReal *rwork; 567 #endif 568 PetscErrorCode ierr; 569 570 PetscFunctionBegin; 571 if (!sub_schurs->use_mumps) { 572 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 573 } 574 575 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 576 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 577 } 578 579 if (pcbddc->dbg_flag) { 580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 583 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 588 } 589 590 /* max size of subsets */ 591 mss = 0; 592 for (i=0;i<sub_schurs->n_subs;i++) { 593 PetscInt subset_size; 594 595 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 596 mss = PetscMax(mss,subset_size); 597 } 598 599 /* min/max and threshold */ 600 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 601 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 602 nmax = PetscMax(nmin,nmax); 603 allocated_S_St = PETSC_FALSE; 604 if (nmin) { 605 allocated_S_St = PETSC_TRUE; 606 } 607 608 /* allocate lapack workspace */ 609 cum = cum2 = 0; 610 maxneigs = 0; 611 for (i=0;i<sub_schurs->n_subs;i++) { 612 PetscInt n,subset_size; 613 614 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 615 n = PetscMin(subset_size,nmax); 616 cum += subset_size; 617 cum2 += subset_size*n; 618 maxneigs = PetscMax(maxneigs,n); 619 } 620 if (mss) { 621 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 622 PetscBLASInt B_itype = 1; 623 PetscBLASInt B_N = mss; 624 PetscReal zero = 0.0; 625 PetscReal eps = 0.0; /* dlamch? */ 626 627 B_lwork = -1; 628 S = NULL; 629 St = NULL; 630 eigs = NULL; 631 eigv = NULL; 632 B_iwork = NULL; 633 B_ifail = NULL; 634 #if defined(PETSC_USE_COMPLEX) 635 rwork = NULL; 636 #endif 637 thresh = 1.0; 638 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 639 #if defined(PETSC_USE_COMPLEX) 640 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 641 #else 642 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 643 #endif 644 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 645 ierr = PetscFPTrapPop();CHKERRQ(ierr); 646 } else { 647 /* TODO */ 648 } 649 } else { 650 lwork = 0; 651 } 652 653 nv = 0; 654 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 655 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 656 } 657 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 658 if (allocated_S_St) { 659 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 660 } 661 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 662 #if defined(PETSC_USE_COMPLEX) 663 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 664 #endif 665 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 666 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 667 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 668 nv+cum,&pcbddc->adaptive_constraints_idxs, 669 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 670 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 671 672 maxneigs = 0; 673 cum = cum2 = cumarray = 0; 674 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 675 pcbddc->adaptive_constraints_data_ptr[0] = 0; 676 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 677 const PetscInt *idxs; 678 679 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 680 for (cum=0;cum<nv;cum++) { 681 pcbddc->adaptive_constraints_n[cum] = 1; 682 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 683 pcbddc->adaptive_constraints_data[cum] = 1.0; 684 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 685 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 686 } 687 cum2 = cum; 688 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 689 } 690 691 if (mss) { /* multilevel */ 692 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 693 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 694 } 695 696 thresh = pcbddc->adaptive_threshold; 697 for (i=0;i<sub_schurs->n_subs;i++) { 698 const PetscInt *idxs; 699 PetscReal infty = PETSC_MAX_REAL; 700 PetscInt j,subset_size,eigs_start = 0; 701 PetscBLASInt B_N; 702 PetscBool same_data = PETSC_FALSE; 703 704 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 705 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 706 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 707 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 708 if (sub_schurs->is_hermitian) { 709 PetscInt j,k; 710 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 711 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 712 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 713 } 714 for (j=0;j<subset_size;j++) { 715 for (k=j;k<subset_size;k++) { 716 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 717 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 718 } 719 } 720 } else { 721 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 722 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 723 } 724 } else { 725 S = Sarray + cumarray; 726 St = Starray + cumarray; 727 } 728 729 /* see if we can save some work */ 730 if (sub_schurs->n_subs == 1) { 731 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 732 } 733 734 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 735 B_neigs = 0; 736 } else { 737 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 738 PetscBLASInt B_itype = 1; 739 PetscBLASInt B_IL, B_IU; 740 PetscReal eps = -1.0; /* dlamch? */ 741 PetscInt nmin_s; 742 743 if (pcbddc->dbg_flag) { 744 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 745 } 746 747 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 748 if (thresh > 1.+PETSC_SMALL) { 749 750 /* ask for eigenvalues larger than thresh */ 751 #if defined(PETSC_USE_COMPLEX) 752 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 753 #else 754 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 755 #endif 756 } else { 757 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 758 B_IL = 1; 759 #if defined(PETSC_USE_COMPLEX) 760 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 761 #else 762 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 763 #endif 764 } 765 ierr = PetscFPTrapPop();CHKERRQ(ierr); 766 if (B_ierr) { 767 if (B_ierr < 0 ) { 768 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 769 } else if (B_ierr <= B_N) { 770 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 771 } else { 772 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 773 } 774 } 775 776 if (B_neigs > nmax) { 777 if (pcbddc->dbg_flag) { 778 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 779 } 780 eigs_start = B_neigs -nmax; 781 B_neigs = nmax; 782 } 783 784 nmin_s = PetscMin(nmin,B_N); 785 if (B_neigs < nmin_s) { 786 PetscBLASInt B_neigs2; 787 788 B_IU = B_N - B_neigs; 789 B_IL = B_N - nmin_s + 1; 790 if (pcbddc->dbg_flag) { 791 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 792 } 793 if (sub_schurs->is_hermitian) { 794 PetscInt j; 795 for (j=0;j<subset_size;j++) { 796 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 797 } 798 for (j=0;j<subset_size;j++) { 799 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 800 } 801 } else { 802 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 803 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 804 } 805 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 806 #if defined(PETSC_USE_COMPLEX) 807 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 808 #else 809 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 810 #endif 811 ierr = PetscFPTrapPop();CHKERRQ(ierr); 812 B_neigs += B_neigs2; 813 } 814 if (B_ierr) { 815 if (B_ierr < 0 ) { 816 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 817 } else if (B_ierr <= B_N) { 818 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 819 } else { 820 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 821 } 822 } 823 if (pcbddc->dbg_flag) { 824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 825 for (j=0;j<B_neigs;j++) { 826 if (eigs[j] == 0.0) { 827 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 828 } else { 829 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 830 } 831 } 832 } 833 } else { 834 /* TODO */ 835 } 836 } 837 maxneigs = PetscMax(B_neigs,maxneigs); 838 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 839 if (B_neigs) { 840 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 841 842 if (pcbddc->dbg_flag > 1) { 843 PetscInt ii; 844 for (ii=0;ii<B_neigs;ii++) { 845 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 846 for (j=0;j<B_N;j++) { 847 #if defined(PETSC_USE_COMPLEX) 848 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 849 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 850 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 851 #else 852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 853 #endif 854 } 855 } 856 } 857 #if 0 858 for (j=0;j<B_neigs;j++) { 859 PetscBLASInt Blas_N,Blas_one = 1.0; 860 PetscScalar norm; 861 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 862 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 863 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 864 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 865 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 866 } else { 867 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 868 } 869 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 870 } 871 #endif 872 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 873 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 874 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 875 cum++; 876 } 877 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 878 /* shift for next computation */ 879 cumarray += subset_size*subset_size; 880 } 881 if (pcbddc->dbg_flag) { 882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 883 } 884 885 if (mss) { 886 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 887 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 888 /* destroy matrices (junk) */ 889 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 890 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 891 } 892 if (allocated_S_St) { 893 ierr = PetscFree2(S,St);CHKERRQ(ierr); 894 } 895 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 896 #if defined(PETSC_USE_COMPLEX) 897 ierr = PetscFree(rwork);CHKERRQ(ierr); 898 #endif 899 if (pcbddc->dbg_flag) { 900 PetscInt maxneigs_r; 901 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 903 } 904 PetscFunctionReturn(0); 905 } 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "PCBDDCSetUpSolvers" 909 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 910 { 911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 912 PetscScalar *coarse_submat_vals; 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 /* Setup local scatters R_to_B and (optionally) R_to_D */ 917 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 918 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 919 920 /* Setup local neumann solver ksp_R */ 921 /* PCBDDCSetUpLocalScatters should be called first! */ 922 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 923 924 /* Change global null space passed in by the user if change of basis has been requested */ 925 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 926 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 927 } 928 929 /* 930 Setup local correction and local part of coarse basis. 931 Gives back the dense local part of the coarse matrix in column major ordering 932 */ 933 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 934 935 /* Compute total number of coarse nodes and setup coarse solver */ 936 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 937 938 /* free */ 939 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 #undef __FUNCT__ 944 #define __FUNCT__ "PCBDDCResetCustomization" 945 PetscErrorCode PCBDDCResetCustomization(PC pc) 946 { 947 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 948 PetscErrorCode ierr; 949 950 PetscFunctionBegin; 951 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 952 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 953 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 954 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 955 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 956 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 957 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 958 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 959 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 960 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 961 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 962 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "PCBDDCResetTopography" 968 PetscErrorCode PCBDDCResetTopography(PC pc) 969 { 970 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 971 PetscInt i; 972 PetscErrorCode ierr; 973 974 PetscFunctionBegin; 975 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 976 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 977 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 978 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 979 for (i=0;i<pcbddc->n_local_subs;i++) { 980 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 981 } 982 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 983 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "PCBDDCResetSolvers" 989 PetscErrorCode PCBDDCResetSolvers(PC pc) 990 { 991 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 992 PetscScalar *array; 993 PetscErrorCode ierr; 994 995 PetscFunctionBegin; 996 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 997 if (pcbddc->coarse_phi_B) { 998 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 999 ierr = PetscFree(array);CHKERRQ(ierr); 1000 } 1001 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1002 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1003 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1004 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1005 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1006 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1007 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1008 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1009 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1010 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1011 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1012 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1013 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1014 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1015 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1016 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1017 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1018 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1019 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1020 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1021 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1022 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1023 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1024 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1025 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1026 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1027 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1028 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1029 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1035 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1036 { 1037 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1038 PC_IS *pcis = (PC_IS*)pc->data; 1039 VecType impVecType; 1040 PetscInt n_constraints,n_R,old_size; 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 if (!pcbddc->ConstraintMatrix) { 1045 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1046 } 1047 /* get sizes */ 1048 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1049 n_R = pcis->n - pcbddc->n_vertices; 1050 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1051 /* local work vectors (try to avoid unneeded work)*/ 1052 /* R nodes */ 1053 old_size = -1; 1054 if (pcbddc->vec1_R) { 1055 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1056 } 1057 if (n_R != old_size) { 1058 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1059 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1060 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1061 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1062 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1063 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1064 } 1065 /* local primal dofs */ 1066 old_size = -1; 1067 if (pcbddc->vec1_P) { 1068 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1069 } 1070 if (pcbddc->local_primal_size != old_size) { 1071 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1072 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1073 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1074 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1075 } 1076 /* local explicit constraints */ 1077 old_size = -1; 1078 if (pcbddc->vec1_C) { 1079 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1080 } 1081 if (n_constraints && n_constraints != old_size) { 1082 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1083 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1084 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1085 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "PCBDDCSetUpCorrection" 1092 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1093 { 1094 PetscErrorCode ierr; 1095 /* pointers to pcis and pcbddc */ 1096 PC_IS* pcis = (PC_IS*)pc->data; 1097 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1098 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1099 /* submatrices of local problem */ 1100 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1101 /* submatrices of benign trick */ 1102 Mat B0_V = NULL; 1103 /* submatrices of local coarse problem */ 1104 Mat S_VV,S_CV,S_VC,S_CC; 1105 /* working matrices */ 1106 Mat C_CR; 1107 /* additional working stuff */ 1108 PC pc_R; 1109 Mat F; 1110 PetscBool isLU,isCHOL,isILU; 1111 1112 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1113 PetscScalar *work; 1114 PetscInt *idx_V_B; 1115 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1116 PetscInt i,n_R,n_D,n_B; 1117 1118 /* some shortcuts to scalars */ 1119 PetscScalar one=1.0,m_one=-1.0; 1120 1121 PetscFunctionBegin; 1122 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1123 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1124 } 1125 1126 /* Set Non-overlapping dimensions */ 1127 n_vertices = pcbddc->n_vertices; 1128 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1129 n_B = pcis->n_B; 1130 n_D = pcis->n - n_B; 1131 n_R = pcis->n - n_vertices; 1132 1133 /* vertices in boundary numbering */ 1134 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1135 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1136 if (i != n_vertices) { 1137 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1138 } 1139 1140 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1141 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1142 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1143 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1144 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1145 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1146 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1147 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1148 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1149 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1150 1151 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1152 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1153 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1154 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1155 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1156 lda_rhs = n_R; 1157 if (isLU || isILU || isCHOL) { 1158 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1159 } else if (sub_schurs->reuse_mumps) { 1160 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1161 MatFactorType type; 1162 1163 F = reuse_mumps->F; 1164 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1165 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1166 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1167 } else { 1168 F = NULL; 1169 } 1170 1171 /* allocate workspace */ 1172 n = 0; 1173 if (n_constraints) { 1174 n += lda_rhs*n_constraints; 1175 } 1176 if (n_vertices) { 1177 n = PetscMax(2*lda_rhs*n_vertices,n); 1178 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1179 } 1180 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1181 1182 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1183 if (n_constraints) { 1184 Mat M1,M2,M3; 1185 Mat auxmat; 1186 IS is_aux; 1187 PetscScalar *array,*array2; 1188 1189 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1190 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1191 1192 /* Extract constraints on R nodes: C_{CR} */ 1193 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1194 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1195 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1196 1197 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1198 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1199 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1200 for (i=0;i<n_constraints;i++) { 1201 const PetscScalar *row_cmat_values; 1202 const PetscInt *row_cmat_indices; 1203 PetscInt size_of_constraint,j; 1204 1205 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1206 for (j=0;j<size_of_constraint;j++) { 1207 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1208 } 1209 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1210 } 1211 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1212 if (F) { 1213 Mat B; 1214 1215 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1216 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1217 ierr = MatDestroy(&B);CHKERRQ(ierr); 1218 } else { 1219 PetscScalar *marr; 1220 1221 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1222 for (i=0;i<n_constraints;i++) { 1223 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1224 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1225 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1226 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1227 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1228 } 1229 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1230 } 1231 if (!pcbddc->switch_static) { 1232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1233 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1234 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1235 for (i=0;i<n_constraints;i++) { 1236 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1237 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1238 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1241 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1242 } 1243 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1244 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1245 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1246 } else { 1247 if (lda_rhs != n_R) { 1248 IS dummy; 1249 1250 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1251 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1252 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1253 } else { 1254 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1255 pcbddc->local_auxmat2 = local_auxmat2_R; 1256 } 1257 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1258 } 1259 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1260 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1261 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1262 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1263 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1264 if (isCHOL) { 1265 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1266 } else { 1267 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1268 } 1269 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1270 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1271 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1272 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1273 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1274 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1275 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1276 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1277 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1278 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1279 } 1280 /* Get submatrices from subdomain matrix */ 1281 if (pcbddc->benign_n) { 1282 IS dummy; 1283 Mat B0_R; 1284 PetscReal norm; 1285 1286 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1287 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1288 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1289 if (norm > PETSC_SMALL) { 1290 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1291 } 1292 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1293 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1294 } 1295 1296 if (n_vertices) { 1297 IS is_aux; 1298 1299 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1300 IS tis; 1301 1302 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1303 ierr = ISSort(tis);CHKERRQ(ierr); 1304 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1305 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1306 } else { 1307 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1308 } 1309 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1312 if (pcbddc->benign_n) { 1313 IS dummy; 1314 1315 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1316 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1317 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1318 } 1319 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1320 } 1321 1322 /* Matrix of coarse basis functions (local) */ 1323 if (pcbddc->coarse_phi_B) { 1324 PetscInt on_B,on_primal,on_D=n_D; 1325 if (pcbddc->coarse_phi_D) { 1326 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1327 } 1328 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1329 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1330 PetscScalar *marray; 1331 1332 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1333 ierr = PetscFree(marray);CHKERRQ(ierr); 1334 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1335 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1336 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1338 } 1339 } 1340 1341 if (!pcbddc->coarse_phi_B) { 1342 PetscScalar *marray; 1343 1344 n = n_B*pcbddc->local_primal_size; 1345 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1346 n += n_D*pcbddc->local_primal_size; 1347 } 1348 if (!pcbddc->symmetric_primal) { 1349 n *= 2; 1350 } 1351 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1352 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1353 n = n_B*pcbddc->local_primal_size; 1354 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1355 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1356 n += n_D*pcbddc->local_primal_size; 1357 } 1358 if (!pcbddc->symmetric_primal) { 1359 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1360 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1361 n = n_B*pcbddc->local_primal_size; 1362 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1363 } 1364 } else { 1365 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1366 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1367 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1368 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1369 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1370 } 1371 } 1372 } 1373 1374 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1375 p0_lidx_I = NULL; 1376 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1377 const PetscInt *idxs; 1378 1379 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1380 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1381 for (i=0;i<pcbddc->benign_n;i++) { 1382 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1383 } 1384 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1385 } 1386 1387 /* vertices */ 1388 if (n_vertices) { 1389 1390 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1391 1392 if (n_R) { 1393 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1394 PetscBLASInt B_N,B_one = 1; 1395 PetscScalar *x,*y; 1396 PetscBool isseqaij; 1397 1398 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1399 if (lda_rhs == n_R) { 1400 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1401 } else { 1402 Mat A_RVt; 1403 PetscScalar *array; 1404 1405 /* TODO optimize */ 1406 ierr = MatConvert(A_RV,MATDENSE,MAT_INITIAL_MATRIX,&A_RVt);CHKERRQ(ierr); 1407 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1408 ierr = PetscMemzero(work+lda_rhs*n_vertices,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1409 ierr = MatDenseGetArray(A_RVt,&array);CHKERRQ(ierr); 1410 for (i=0;i<n_vertices;i++) { 1411 ierr = PetscMemcpy(work+(n_vertices+i)*lda_rhs,array+i*n_R,n_R*sizeof(PetscScalar));CHKERRQ(ierr); 1412 } 1413 ierr = MatDenseRestoreArray(A_RVt,&array);CHKERRQ(ierr); 1414 ierr = MatDestroy(&A_RVt);CHKERRQ(ierr); 1415 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&A_RV);CHKERRQ(ierr); 1416 } 1417 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1418 if (F) { /* TODO could be optimized for symmetric problems */ 1419 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1420 } else { 1421 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1422 for (i=0;i<n_vertices;i++) { 1423 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1424 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1425 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1426 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1427 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1428 } 1429 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1430 } 1431 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1432 /* S_VV and S_CV */ 1433 if (n_constraints) { 1434 Mat B; 1435 1436 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1437 for (i=0;i<n_vertices;i++) { 1438 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1439 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1440 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1441 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1442 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1443 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1444 } 1445 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1446 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1447 ierr = MatDestroy(&B);CHKERRQ(ierr); 1448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1449 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1450 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1451 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1452 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1453 ierr = MatDestroy(&B);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1456 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1457 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1458 } 1459 if (lda_rhs != n_R) { 1460 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1461 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1462 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1463 } 1464 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1465 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1466 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1467 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1468 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1469 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1470 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1471 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1472 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1473 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1474 } else { 1475 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1476 } 1477 for (i=0;i<pcbddc->benign_n;i++) { 1478 const PetscScalar *vals; 1479 const PetscInt *idxs; 1480 PetscInt n,j,primal_idx; 1481 1482 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1483 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1484 for (j=0;j<n;j++) { 1485 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1486 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1487 } 1488 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1489 } 1490 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1491 1492 /* coarse basis functions */ 1493 for (i=0;i<n_vertices;i++) { 1494 PetscScalar *y; 1495 1496 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1497 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1498 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1499 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 y[n_B*i+idx_V_B[i]] = 1.0; 1502 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1503 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1504 1505 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1506 PetscInt j; 1507 1508 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1509 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1510 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1513 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1514 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1515 } 1516 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1517 } 1518 /* if n_R == 0 the object is not destroyed */ 1519 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1520 } 1521 1522 if (n_constraints) { 1523 Mat B; 1524 1525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1526 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1527 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1528 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1529 if (n_vertices) { 1530 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1531 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1532 } else { 1533 Mat S_VCt; 1534 1535 if (lda_rhs != n_R) { 1536 ierr = MatDestroy(&B);CHKERRQ(ierr); 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1538 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1539 } 1540 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1541 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1542 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1543 } 1544 } 1545 ierr = MatDestroy(&B);CHKERRQ(ierr); 1546 /* coarse basis functions */ 1547 for (i=0;i<n_constraints;i++) { 1548 PetscScalar *y; 1549 1550 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1551 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1552 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1553 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1556 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1557 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1558 PetscInt j; 1559 1560 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1561 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1562 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1565 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1566 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1567 } 1568 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1569 } 1570 } 1571 if (n_constraints) { 1572 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1573 } 1574 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1575 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1576 1577 /* compute other basis functions for non-symmetric problems */ 1578 if (!pcbddc->symmetric_primal) { 1579 Mat B_V=NULL,B_C=NULL; 1580 PetscScalar *marray; 1581 1582 if (n_constraints) { 1583 Mat S_CCT,C_CRT; 1584 1585 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 1586 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1587 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1588 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1589 if (n_vertices) { 1590 Mat S_VCT; 1591 1592 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1593 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1594 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1595 } 1596 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 1597 } 1598 if (n_vertices && n_R) { 1599 PetscScalar *av,*marray; 1600 const PetscInt *xadj,*adjncy; 1601 PetscInt n; 1602 PetscBool flg_row; 1603 1604 /* B_V = B_V - A_VR^T */ 1605 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1606 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1607 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 1608 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1609 for (i=0;i<n;i++) { 1610 PetscInt j; 1611 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 1612 } 1613 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1614 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1615 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1616 } 1617 1618 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1619 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1620 for (i=0;i<n_vertices;i++) { 1621 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 1622 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1623 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1624 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1625 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1626 } 1627 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1628 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 1629 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 1630 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 1631 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1632 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1633 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1634 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1635 } 1636 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 1637 /* coarse basis functions */ 1638 for (i=0;i<pcbddc->local_primal_size;i++) { 1639 PetscScalar *y; 1640 1641 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1642 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1643 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1644 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 if (i<n_vertices) { 1647 y[n_B*i+idx_V_B[i]] = 1.0; 1648 } 1649 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1650 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1651 1652 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1653 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1654 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1655 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1658 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1659 } 1660 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1661 } 1662 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1663 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1664 } 1665 /* free memory */ 1666 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1667 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1668 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1669 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1670 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1671 ierr = PetscFree(work);CHKERRQ(ierr); 1672 if (n_vertices) { 1673 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1674 } 1675 if (n_constraints) { 1676 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1677 } 1678 /* Checking coarse_sub_mat and coarse basis functios */ 1679 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1680 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1681 if (pcbddc->dbg_flag) { 1682 Mat coarse_sub_mat; 1683 Mat AUXMAT,TM1,TM2,TM3,TM4; 1684 Mat coarse_phi_D,coarse_phi_B; 1685 Mat coarse_psi_D,coarse_psi_B; 1686 Mat A_II,A_BB,A_IB,A_BI; 1687 Mat C_B,CPHI; 1688 IS is_dummy; 1689 Vec mones; 1690 MatType checkmattype=MATSEQAIJ; 1691 PetscReal real_value; 1692 1693 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1694 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1695 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1696 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1697 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1698 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1699 if (!pcbddc->symmetric_primal) { 1700 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1701 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1702 } 1703 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1704 1705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1706 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1708 if (!pcbddc->symmetric_primal) { 1709 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1710 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1711 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1712 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1713 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1714 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1715 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1716 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1717 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1718 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1719 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1720 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1721 } else { 1722 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1723 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1724 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1725 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1726 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1727 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1728 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1729 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1730 } 1731 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1732 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1733 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1734 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1735 if (pcbddc->benign_n) { 1736 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1737 PetscScalar *data,*data2; 1738 PetscInt j; 1739 1740 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1741 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1742 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1743 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1744 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1745 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1746 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1747 for (j=0;j<pcbddc->benign_n;j++) { 1748 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1749 for (i=0;i<pcbddc->local_primal_size;i++) { 1750 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1751 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1752 } 1753 } 1754 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1755 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1756 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1757 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1758 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1759 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1760 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1761 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1762 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1763 } 1764 #if 0 1765 { 1766 PetscViewer viewer; 1767 char filename[256]; 1768 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 1769 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1770 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1771 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 1772 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 1773 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 1774 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1775 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 1776 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 1777 if (pcbddc->coarse_phi_D) { 1778 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 1779 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 1780 } 1781 if (pcbddc->coarse_psi_B) { 1782 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 1783 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 1784 } 1785 if (pcbddc->coarse_phi_B) { 1786 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 1787 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 1788 } 1789 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1790 } 1791 #endif 1792 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1793 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1794 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1796 1797 /* check constraints */ 1798 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1799 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1800 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1801 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1802 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1803 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1804 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1805 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1807 if (!pcbddc->symmetric_primal) { 1808 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1809 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1810 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1811 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1813 } 1814 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1815 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1816 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1817 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1818 } 1819 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1820 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1821 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1822 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1823 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1824 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1825 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1826 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1827 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1828 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1829 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1830 if (!pcbddc->symmetric_primal) { 1831 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1832 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1833 } 1834 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1835 } 1836 /* get back data */ 1837 *coarse_submat_vals_n = coarse_submat_vals; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1843 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1844 { 1845 Mat *work_mat; 1846 IS isrow_s,iscol_s; 1847 PetscBool rsorted,csorted; 1848 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1849 PetscErrorCode ierr; 1850 1851 PetscFunctionBegin; 1852 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1853 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1854 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1855 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1856 1857 if (!rsorted) { 1858 const PetscInt *idxs; 1859 PetscInt *idxs_sorted,i; 1860 1861 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1862 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1863 for (i=0;i<rsize;i++) { 1864 idxs_perm_r[i] = i; 1865 } 1866 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1867 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1868 for (i=0;i<rsize;i++) { 1869 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1870 } 1871 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1872 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1873 } else { 1874 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1875 isrow_s = isrow; 1876 } 1877 1878 if (!csorted) { 1879 if (isrow == iscol) { 1880 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1881 iscol_s = isrow_s; 1882 } else { 1883 const PetscInt *idxs; 1884 PetscInt *idxs_sorted,i; 1885 1886 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1887 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1888 for (i=0;i<csize;i++) { 1889 idxs_perm_c[i] = i; 1890 } 1891 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1892 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1893 for (i=0;i<csize;i++) { 1894 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1895 } 1896 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1897 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1898 } 1899 } else { 1900 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1901 iscol_s = iscol; 1902 } 1903 1904 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1905 1906 if (!rsorted || !csorted) { 1907 Mat new_mat; 1908 IS is_perm_r,is_perm_c; 1909 1910 if (!rsorted) { 1911 PetscInt *idxs_r,i; 1912 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1913 for (i=0;i<rsize;i++) { 1914 idxs_r[idxs_perm_r[i]] = i; 1915 } 1916 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1917 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1918 } else { 1919 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1920 } 1921 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1922 1923 if (!csorted) { 1924 if (isrow_s == iscol_s) { 1925 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1926 is_perm_c = is_perm_r; 1927 } else { 1928 PetscInt *idxs_c,i; 1929 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1930 for (i=0;i<csize;i++) { 1931 idxs_c[idxs_perm_c[i]] = i; 1932 } 1933 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1934 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1935 } 1936 } else { 1937 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1938 } 1939 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1940 1941 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1942 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1943 work_mat[0] = new_mat; 1944 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1945 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1946 } 1947 1948 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1949 *B = work_mat[0]; 1950 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1951 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1952 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1958 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1959 { 1960 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1961 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1962 Mat new_mat; 1963 IS is_local,is_global; 1964 PetscInt local_size; 1965 PetscBool isseqaij; 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1970 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1971 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1972 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1973 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1974 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1975 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1976 1977 /* check */ 1978 if (pcbddc->dbg_flag) { 1979 Vec x,x_change; 1980 PetscReal error; 1981 1982 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1983 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1984 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1985 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1988 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1989 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1990 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1991 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1994 ierr = VecDestroy(&x);CHKERRQ(ierr); 1995 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1996 } 1997 1998 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1999 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2000 if (isseqaij) { 2001 Mat M; 2002 2003 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2004 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2005 ierr = MatDestroy(&M);CHKERRQ(ierr); 2006 } else { 2007 Mat work_mat,M; 2008 2009 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2010 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2011 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2012 ierr = MatDestroy(&M);CHKERRQ(ierr); 2013 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2014 } 2015 if (matis->A->symmetric_set) { 2016 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2017 #if !defined(PETSC_USE_COMPLEX) 2018 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2019 #endif 2020 } 2021 /* 2022 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2023 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2024 */ 2025 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2031 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2032 { 2033 PC_IS* pcis = (PC_IS*)(pc->data); 2034 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2035 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2036 PetscInt *idx_R_local=NULL; 2037 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2038 PetscInt vbs,bs; 2039 PetscBT bitmask=NULL; 2040 PetscErrorCode ierr; 2041 2042 PetscFunctionBegin; 2043 /* 2044 No need to setup local scatters if 2045 - primal space is unchanged 2046 AND 2047 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2048 AND 2049 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2050 */ 2051 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2052 PetscFunctionReturn(0); 2053 } 2054 /* destroy old objects */ 2055 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2056 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2057 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2058 /* Set Non-overlapping dimensions */ 2059 n_B = pcis->n_B; 2060 n_D = pcis->n - n_B; 2061 n_vertices = pcbddc->n_vertices; 2062 2063 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2064 2065 /* create auxiliary bitmask and allocate workspace */ 2066 if (!sub_schurs->reuse_mumps) { 2067 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2068 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2069 for (i=0;i<n_vertices;i++) { 2070 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2071 } 2072 2073 for (i=0, n_R=0; i<pcis->n; i++) { 2074 if (!PetscBTLookup(bitmask,i)) { 2075 idx_R_local[n_R++] = i; 2076 } 2077 } 2078 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2079 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2080 2081 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2082 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2083 } 2084 2085 /* Block code */ 2086 vbs = 1; 2087 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2088 if (bs>1 && !(n_vertices%bs)) { 2089 PetscBool is_blocked = PETSC_TRUE; 2090 PetscInt *vary; 2091 if (!sub_schurs->reuse_mumps) { 2092 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2093 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2094 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2095 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2096 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2097 for (i=0; i<pcis->n/bs; i++) { 2098 if (vary[i]!=0 && vary[i]!=bs) { 2099 is_blocked = PETSC_FALSE; 2100 break; 2101 } 2102 } 2103 ierr = PetscFree(vary);CHKERRQ(ierr); 2104 } else { 2105 /* Verify directly the R set */ 2106 for (i=0; i<n_R/bs; i++) { 2107 PetscInt j,node=idx_R_local[bs*i]; 2108 for (j=1; j<bs; j++) { 2109 if (node != idx_R_local[bs*i+j]-j) { 2110 is_blocked = PETSC_FALSE; 2111 break; 2112 } 2113 } 2114 } 2115 } 2116 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2117 vbs = bs; 2118 for (i=0;i<n_R/vbs;i++) { 2119 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2120 } 2121 } 2122 } 2123 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2124 if (sub_schurs->reuse_mumps) { 2125 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2126 2127 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2128 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2129 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2130 reuse_mumps->is_R = pcbddc->is_R_local; 2131 } else { 2132 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2133 } 2134 2135 /* print some info if requested */ 2136 if (pcbddc->dbg_flag) { 2137 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2138 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2139 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2141 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2144 } 2145 2146 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2147 if (!sub_schurs->reuse_mumps) { 2148 IS is_aux1,is_aux2; 2149 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2150 2151 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2152 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2153 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2154 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2155 for (i=0; i<n_D; i++) { 2156 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2157 } 2158 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2159 for (i=0, j=0; i<n_R; i++) { 2160 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2161 aux_array1[j++] = i; 2162 } 2163 } 2164 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2165 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2166 for (i=0, j=0; i<n_B; i++) { 2167 if (!PetscBTLookup(bitmask,is_indices[i])) { 2168 aux_array2[j++] = i; 2169 } 2170 } 2171 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2172 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2173 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2174 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2175 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2176 2177 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2178 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2179 for (i=0, j=0; i<n_R; i++) { 2180 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2181 aux_array1[j++] = i; 2182 } 2183 } 2184 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2185 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2186 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2187 } 2188 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2189 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2190 } else { 2191 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2192 IS tis; 2193 PetscInt schur_size; 2194 2195 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2196 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2197 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2198 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2199 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2200 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2201 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2202 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2203 } 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2211 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2212 { 2213 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2214 PC_IS *pcis = (PC_IS*)pc->data; 2215 PC pc_temp; 2216 Mat A_RR; 2217 MatReuse reuse; 2218 PetscScalar m_one = -1.0; 2219 PetscReal value; 2220 PetscInt n_D,n_R; 2221 PetscBool use_exact,use_exact_reduced,issbaij; 2222 PetscErrorCode ierr; 2223 /* prefixes stuff */ 2224 char dir_prefix[256],neu_prefix[256],str_level[16]; 2225 size_t len; 2226 2227 PetscFunctionBegin; 2228 2229 /* compute prefixes */ 2230 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2231 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2232 if (!pcbddc->current_level) { 2233 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2234 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2235 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2236 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2237 } else { 2238 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2239 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2240 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2241 len -= 15; /* remove "pc_bddc_coarse_" */ 2242 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2243 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2244 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2245 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2246 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2247 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2248 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2249 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2250 } 2251 2252 /* DIRICHLET PROBLEM */ 2253 if (dirichlet) { 2254 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2255 if (pcbddc->local_mat->symmetric_set) { 2256 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2257 } 2258 /* Matrix for Dirichlet problem is pcis->A_II */ 2259 n_D = pcis->n - pcis->n_B; 2260 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2261 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2262 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2263 /* default */ 2264 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2265 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2266 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2267 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2268 if (issbaij) { 2269 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2270 } else { 2271 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2272 } 2273 /* Allow user's customization */ 2274 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2275 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2276 } 2277 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2278 if (sub_schurs->reuse_mumps) { 2279 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2280 2281 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2282 } 2283 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2284 if (!n_D) { 2285 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2286 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2287 } 2288 /* Set Up KSP for Dirichlet problem of BDDC */ 2289 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2290 /* set ksp_D into pcis data */ 2291 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2292 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2293 pcis->ksp_D = pcbddc->ksp_D; 2294 } 2295 2296 /* NEUMANN PROBLEM */ 2297 A_RR = 0; 2298 if (neumann) { 2299 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2300 PetscInt ibs,mbs; 2301 PetscBool issbaij; 2302 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2303 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2304 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2305 if (pcbddc->ksp_R) { /* already created ksp */ 2306 PetscInt nn_R; 2307 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2308 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2309 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2310 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2311 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2312 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2313 reuse = MAT_INITIAL_MATRIX; 2314 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2315 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2316 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2317 reuse = MAT_INITIAL_MATRIX; 2318 } else { /* safe to reuse the matrix */ 2319 reuse = MAT_REUSE_MATRIX; 2320 } 2321 } 2322 /* last check */ 2323 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2324 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2325 reuse = MAT_INITIAL_MATRIX; 2326 } 2327 } else { /* first time, so we need to create the matrix */ 2328 reuse = MAT_INITIAL_MATRIX; 2329 } 2330 /* extract A_RR */ 2331 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2332 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2333 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2334 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2335 if (matis->A == pcbddc->local_mat) { 2336 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2337 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2338 } else { 2339 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2340 } 2341 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2342 if (matis->A == pcbddc->local_mat) { 2343 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2344 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2345 } else { 2346 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2347 } 2348 } 2349 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2350 if (pcbddc->local_mat->symmetric_set) { 2351 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2352 } 2353 if (!pcbddc->ksp_R) { /* create object if not present */ 2354 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2355 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2356 /* default */ 2357 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2358 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2359 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2360 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2361 if (issbaij) { 2362 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2363 } else { 2364 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2365 } 2366 /* Allow user's customization */ 2367 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2368 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2369 } 2370 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2371 if (!n_R) { 2372 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2373 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2374 } 2375 /* Reuse MUMPS solver if it is present */ 2376 if (sub_schurs->reuse_mumps) { 2377 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2378 2379 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2380 } 2381 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2382 /* Set Up KSP for Neumann problem of BDDC */ 2383 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2384 } 2385 /* free Neumann problem's matrix */ 2386 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2387 2388 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2389 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2390 if (pcbddc->dbg_flag) { 2391 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2392 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2394 } 2395 if (dirichlet) { /* Dirichlet */ 2396 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2397 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2398 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2399 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2400 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2401 /* need to be adapted? */ 2402 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2403 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2404 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2405 /* print info */ 2406 if (pcbddc->dbg_flag) { 2407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2409 } 2410 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2411 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2412 } 2413 } 2414 if (neumann) { /* Neumann */ 2415 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2416 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2417 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2418 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2419 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2420 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2421 /* need to be adapted? */ 2422 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2423 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2424 /* print info */ 2425 if (pcbddc->dbg_flag) { 2426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2427 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2428 } 2429 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2430 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2431 } 2432 } 2433 } 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2439 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2440 { 2441 PetscErrorCode ierr; 2442 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2443 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2444 2445 PetscFunctionBegin; 2446 if (!sub_schurs->reuse_mumps) { 2447 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2448 } 2449 if (!pcbddc->switch_static) { 2450 if (applytranspose && pcbddc->local_auxmat1) { 2451 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2452 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2453 } 2454 if (!sub_schurs->reuse_mumps) { 2455 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2456 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2457 } else { 2458 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2459 2460 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2461 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2462 } 2463 } else { 2464 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2465 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2466 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2467 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2468 if (applytranspose && pcbddc->local_auxmat1) { 2469 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2470 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2471 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2472 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2473 } 2474 } 2475 if (!sub_schurs->reuse_mumps) { 2476 if (applytranspose) { 2477 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2478 } else { 2479 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2480 } 2481 } else { 2482 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2483 2484 if (applytranspose) { 2485 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2486 } else { 2487 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2488 } 2489 } 2490 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2491 if (!pcbddc->switch_static) { 2492 if (!sub_schurs->reuse_mumps) { 2493 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2494 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2495 } else { 2496 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2497 2498 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2499 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2500 } 2501 if (!applytranspose && pcbddc->local_auxmat1) { 2502 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2503 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2504 } 2505 } else { 2506 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2507 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2508 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2509 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 if (!applytranspose && pcbddc->local_auxmat1) { 2511 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2512 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2513 } 2514 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2515 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2518 } 2519 PetscFunctionReturn(0); 2520 } 2521 2522 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2523 #undef __FUNCT__ 2524 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2525 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2526 { 2527 PetscErrorCode ierr; 2528 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2529 PC_IS* pcis = (PC_IS*) (pc->data); 2530 const PetscScalar zero = 0.0; 2531 2532 PetscFunctionBegin; 2533 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2534 if (applytranspose) { 2535 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2536 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2537 } else { 2538 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2539 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2540 } 2541 2542 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2543 if (pcbddc->benign_n) { 2544 PetscScalar *array; 2545 PetscInt j; 2546 2547 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2548 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2549 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2550 } 2551 2552 /* start communications from local primal nodes to rhs of coarse solver */ 2553 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2554 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2555 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2556 2557 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2558 /* TODO remove null space when doing multilevel */ 2559 if (pcbddc->coarse_ksp) { 2560 Mat coarse_mat; 2561 Vec rhs,sol; 2562 MatNullSpace nullsp; 2563 2564 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2565 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2566 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2567 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2568 if (nullsp) { 2569 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2570 } 2571 if (applytranspose) { 2572 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2573 } else { 2574 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2575 } 2576 if (nullsp) { 2577 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2578 } 2579 } 2580 2581 /* Local solution on R nodes */ 2582 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2583 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2584 } 2585 2586 /* communications from coarse sol to local primal nodes */ 2587 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2588 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2589 2590 /* Sum contributions from two levels */ 2591 if (applytranspose) { 2592 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2593 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2594 } else { 2595 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2596 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2597 } 2598 /* store p0 */ 2599 if (pcbddc->benign_n) { 2600 PetscScalar *array; 2601 PetscInt j; 2602 2603 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2604 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2605 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2606 } 2607 PetscFunctionReturn(0); 2608 } 2609 2610 #undef __FUNCT__ 2611 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2612 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2613 { 2614 PetscErrorCode ierr; 2615 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2616 PetscScalar *array; 2617 Vec from,to; 2618 2619 PetscFunctionBegin; 2620 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2621 from = pcbddc->coarse_vec; 2622 to = pcbddc->vec1_P; 2623 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2624 Vec tvec; 2625 2626 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2627 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2628 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2629 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2630 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2631 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2632 } 2633 } else { /* from local to global -> put data in coarse right hand side */ 2634 from = pcbddc->vec1_P; 2635 to = pcbddc->coarse_vec; 2636 } 2637 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2638 PetscFunctionReturn(0); 2639 } 2640 2641 #undef __FUNCT__ 2642 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2643 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2644 { 2645 PetscErrorCode ierr; 2646 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2647 PetscScalar *array; 2648 Vec from,to; 2649 2650 PetscFunctionBegin; 2651 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2652 from = pcbddc->coarse_vec; 2653 to = pcbddc->vec1_P; 2654 } else { /* from local to global -> put data in coarse right hand side */ 2655 from = pcbddc->vec1_P; 2656 to = pcbddc->coarse_vec; 2657 } 2658 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2659 if (smode == SCATTER_FORWARD) { 2660 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2661 Vec tvec; 2662 2663 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2664 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2665 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2666 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2667 } 2668 } else { 2669 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2670 ierr = VecResetArray(from);CHKERRQ(ierr); 2671 } 2672 } 2673 PetscFunctionReturn(0); 2674 } 2675 2676 /* uncomment for testing purposes */ 2677 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2678 #undef __FUNCT__ 2679 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2680 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2681 { 2682 PetscErrorCode ierr; 2683 PC_IS* pcis = (PC_IS*)(pc->data); 2684 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2685 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2686 /* one and zero */ 2687 PetscScalar one=1.0,zero=0.0; 2688 /* space to store constraints and their local indices */ 2689 PetscScalar *constraints_data; 2690 PetscInt *constraints_idxs,*constraints_idxs_B; 2691 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2692 PetscInt *constraints_n; 2693 /* iterators */ 2694 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2695 /* BLAS integers */ 2696 PetscBLASInt lwork,lierr; 2697 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2698 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2699 /* reuse */ 2700 PetscInt olocal_primal_size,olocal_primal_size_cc; 2701 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2702 /* change of basis */ 2703 PetscBool qr_needed; 2704 PetscBT change_basis,qr_needed_idx; 2705 /* auxiliary stuff */ 2706 PetscInt *nnz,*is_indices; 2707 PetscInt ncc; 2708 /* some quantities */ 2709 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2710 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2711 2712 PetscFunctionBegin; 2713 /* Destroy Mat objects computed previously */ 2714 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2715 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2716 /* save info on constraints from previous setup (if any) */ 2717 olocal_primal_size = pcbddc->local_primal_size; 2718 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2719 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2720 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2721 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2722 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2723 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2724 2725 /* print some info */ 2726 if (pcbddc->dbg_flag) { 2727 IS vertices; 2728 PetscInt nv,nedges,nfaces; 2729 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2730 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2731 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2732 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2733 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2734 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2735 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2737 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2738 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2739 } 2740 2741 if (!pcbddc->adaptive_selection) { 2742 IS ISForVertices,*ISForFaces,*ISForEdges; 2743 MatNullSpace nearnullsp; 2744 const Vec *nearnullvecs; 2745 Vec *localnearnullsp; 2746 PetscScalar *array; 2747 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2748 PetscBool nnsp_has_cnst; 2749 /* LAPACK working arrays for SVD or POD */ 2750 PetscBool skip_lapack,boolforchange; 2751 PetscScalar *work; 2752 PetscReal *singular_vals; 2753 #if defined(PETSC_USE_COMPLEX) 2754 PetscReal *rwork; 2755 #endif 2756 #if defined(PETSC_MISSING_LAPACK_GESVD) 2757 PetscScalar *temp_basis,*correlation_mat; 2758 #else 2759 PetscBLASInt dummy_int=1; 2760 PetscScalar dummy_scalar=1.; 2761 #endif 2762 2763 /* Get index sets for faces, edges and vertices from graph */ 2764 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2765 /* free unneeded index sets */ 2766 if (!pcbddc->use_vertices) { 2767 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2768 } 2769 if (!pcbddc->use_edges) { 2770 for (i=0;i<n_ISForEdges;i++) { 2771 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2772 } 2773 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2774 n_ISForEdges = 0; 2775 } 2776 if (!pcbddc->use_faces) { 2777 for (i=0;i<n_ISForFaces;i++) { 2778 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2779 } 2780 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2781 n_ISForFaces = 0; 2782 } 2783 2784 #if defined(PETSC_USE_DEBUG) 2785 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2786 Also use_change_of_basis should be consistent among processors */ 2787 if (pcbddc->NullSpace) { 2788 PetscBool tbool[2],gbool[2]; 2789 2790 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2791 pcbddc->use_change_of_basis = PETSC_TRUE; 2792 if (!ISForEdges) { 2793 pcbddc->use_change_on_faces = PETSC_TRUE; 2794 } 2795 } 2796 tbool[0] = pcbddc->use_change_of_basis; 2797 tbool[1] = pcbddc->use_change_on_faces; 2798 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2799 pcbddc->use_change_of_basis = gbool[0]; 2800 pcbddc->use_change_on_faces = gbool[1]; 2801 } 2802 #endif 2803 2804 /* check if near null space is attached to global mat */ 2805 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2806 if (nearnullsp) { 2807 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2808 /* remove any stored info */ 2809 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2810 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2811 /* store information for BDDC solver reuse */ 2812 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2813 pcbddc->onearnullspace = nearnullsp; 2814 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2815 for (i=0;i<nnsp_size;i++) { 2816 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2817 } 2818 } else { /* if near null space is not provided BDDC uses constants by default */ 2819 nnsp_size = 0; 2820 nnsp_has_cnst = PETSC_TRUE; 2821 } 2822 /* get max number of constraints on a single cc */ 2823 max_constraints = nnsp_size; 2824 if (nnsp_has_cnst) max_constraints++; 2825 2826 /* 2827 Evaluate maximum storage size needed by the procedure 2828 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2829 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2830 There can be multiple constraints per connected component 2831 */ 2832 n_vertices = 0; 2833 if (ISForVertices) { 2834 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2835 } 2836 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2837 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2838 2839 total_counts = n_ISForFaces+n_ISForEdges; 2840 total_counts *= max_constraints; 2841 total_counts += n_vertices; 2842 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2843 2844 total_counts = 0; 2845 max_size_of_constraint = 0; 2846 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2847 IS used_is; 2848 if (i<n_ISForEdges) { 2849 used_is = ISForEdges[i]; 2850 } else { 2851 used_is = ISForFaces[i-n_ISForEdges]; 2852 } 2853 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2854 total_counts += j; 2855 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2856 } 2857 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2858 2859 /* get local part of global near null space vectors */ 2860 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2861 for (k=0;k<nnsp_size;k++) { 2862 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2863 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2864 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2865 } 2866 2867 /* whether or not to skip lapack calls */ 2868 skip_lapack = PETSC_TRUE; 2869 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2870 2871 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2872 if (!skip_lapack) { 2873 PetscScalar temp_work; 2874 2875 #if defined(PETSC_MISSING_LAPACK_GESVD) 2876 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2877 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2878 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2879 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2880 #if defined(PETSC_USE_COMPLEX) 2881 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2882 #endif 2883 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2884 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2885 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2886 lwork = -1; 2887 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2888 #if !defined(PETSC_USE_COMPLEX) 2889 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2890 #else 2891 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2892 #endif 2893 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2894 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2895 #else /* on missing GESVD */ 2896 /* SVD */ 2897 PetscInt max_n,min_n; 2898 max_n = max_size_of_constraint; 2899 min_n = max_constraints; 2900 if (max_size_of_constraint < max_constraints) { 2901 min_n = max_size_of_constraint; 2902 max_n = max_constraints; 2903 } 2904 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2905 #if defined(PETSC_USE_COMPLEX) 2906 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2907 #endif 2908 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2909 lwork = -1; 2910 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2911 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2913 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2914 #if !defined(PETSC_USE_COMPLEX) 2915 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2916 #else 2917 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2918 #endif 2919 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2920 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2921 #endif /* on missing GESVD */ 2922 /* Allocate optimal workspace */ 2923 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2924 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2925 } 2926 /* Now we can loop on constraining sets */ 2927 total_counts = 0; 2928 constraints_idxs_ptr[0] = 0; 2929 constraints_data_ptr[0] = 0; 2930 /* vertices */ 2931 if (n_vertices) { 2932 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2933 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2934 for (i=0;i<n_vertices;i++) { 2935 constraints_n[total_counts] = 1; 2936 constraints_data[total_counts] = 1.0; 2937 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2938 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2939 total_counts++; 2940 } 2941 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2942 n_vertices = total_counts; 2943 } 2944 2945 /* edges and faces */ 2946 total_counts_cc = total_counts; 2947 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2948 IS used_is; 2949 PetscBool idxs_copied = PETSC_FALSE; 2950 2951 if (ncc<n_ISForEdges) { 2952 used_is = ISForEdges[ncc]; 2953 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2954 } else { 2955 used_is = ISForFaces[ncc-n_ISForEdges]; 2956 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2957 } 2958 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2959 2960 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2961 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2962 /* change of basis should not be performed on local periodic nodes */ 2963 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2964 if (nnsp_has_cnst) { 2965 PetscScalar quad_value; 2966 2967 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2968 idxs_copied = PETSC_TRUE; 2969 2970 if (!pcbddc->use_nnsp_true) { 2971 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2972 } else { 2973 quad_value = 1.0; 2974 } 2975 for (j=0;j<size_of_constraint;j++) { 2976 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2977 } 2978 temp_constraints++; 2979 total_counts++; 2980 } 2981 for (k=0;k<nnsp_size;k++) { 2982 PetscReal real_value; 2983 PetscScalar *ptr_to_data; 2984 2985 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2986 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2987 for (j=0;j<size_of_constraint;j++) { 2988 ptr_to_data[j] = array[is_indices[j]]; 2989 } 2990 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2991 /* check if array is null on the connected component */ 2992 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2993 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2994 if (real_value > 0.0) { /* keep indices and values */ 2995 temp_constraints++; 2996 total_counts++; 2997 if (!idxs_copied) { 2998 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2999 idxs_copied = PETSC_TRUE; 3000 } 3001 } 3002 } 3003 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3004 valid_constraints = temp_constraints; 3005 if (!pcbddc->use_nnsp_true && temp_constraints) { 3006 if (temp_constraints == 1) { /* just normalize the constraint */ 3007 PetscScalar norm,*ptr_to_data; 3008 3009 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3010 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3011 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3012 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3013 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3014 } else { /* perform SVD */ 3015 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3016 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3017 3018 #if defined(PETSC_MISSING_LAPACK_GESVD) 3019 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3020 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3021 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3022 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3023 from that computed using LAPACKgesvd 3024 -> This is due to a different computation of eigenvectors in LAPACKheev 3025 -> The quality of the POD-computed basis will be the same */ 3026 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3027 /* Store upper triangular part of correlation matrix */ 3028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3029 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3030 for (j=0;j<temp_constraints;j++) { 3031 for (k=0;k<j+1;k++) { 3032 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3033 } 3034 } 3035 /* compute eigenvalues and eigenvectors of correlation matrix */ 3036 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3037 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3038 #if !defined(PETSC_USE_COMPLEX) 3039 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3040 #else 3041 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3042 #endif 3043 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3044 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3045 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3046 j = 0; 3047 while (j < temp_constraints && singular_vals[j] < tol) j++; 3048 total_counts = total_counts-j; 3049 valid_constraints = temp_constraints-j; 3050 /* scale and copy POD basis into used quadrature memory */ 3051 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3052 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3053 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3054 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3055 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3056 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3057 if (j<temp_constraints) { 3058 PetscInt ii; 3059 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3061 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3062 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3063 for (k=0;k<temp_constraints-j;k++) { 3064 for (ii=0;ii<size_of_constraint;ii++) { 3065 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3066 } 3067 } 3068 } 3069 #else /* on missing GESVD */ 3070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3071 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3072 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3073 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3074 #if !defined(PETSC_USE_COMPLEX) 3075 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3076 #else 3077 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3078 #endif 3079 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3080 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3081 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3082 k = temp_constraints; 3083 if (k > size_of_constraint) k = size_of_constraint; 3084 j = 0; 3085 while (j < k && singular_vals[k-j-1] < tol) j++; 3086 valid_constraints = k-j; 3087 total_counts = total_counts-temp_constraints+valid_constraints; 3088 #endif /* on missing GESVD */ 3089 } 3090 } 3091 /* update pointers information */ 3092 if (valid_constraints) { 3093 constraints_n[total_counts_cc] = valid_constraints; 3094 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3095 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3096 /* set change_of_basis flag */ 3097 if (boolforchange) { 3098 PetscBTSet(change_basis,total_counts_cc); 3099 } 3100 total_counts_cc++; 3101 } 3102 } 3103 /* free workspace */ 3104 if (!skip_lapack) { 3105 ierr = PetscFree(work);CHKERRQ(ierr); 3106 #if defined(PETSC_USE_COMPLEX) 3107 ierr = PetscFree(rwork);CHKERRQ(ierr); 3108 #endif 3109 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3110 #if defined(PETSC_MISSING_LAPACK_GESVD) 3111 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3112 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3113 #endif 3114 } 3115 for (k=0;k<nnsp_size;k++) { 3116 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3117 } 3118 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3119 /* free index sets of faces, edges and vertices */ 3120 for (i=0;i<n_ISForFaces;i++) { 3121 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3122 } 3123 if (n_ISForFaces) { 3124 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3125 } 3126 for (i=0;i<n_ISForEdges;i++) { 3127 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3128 } 3129 if (n_ISForEdges) { 3130 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3131 } 3132 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3133 } else { 3134 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3135 3136 total_counts = 0; 3137 n_vertices = 0; 3138 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3139 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3140 } 3141 max_constraints = 0; 3142 total_counts_cc = 0; 3143 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3144 total_counts += pcbddc->adaptive_constraints_n[i]; 3145 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3146 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3147 } 3148 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3149 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3150 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3151 constraints_data = pcbddc->adaptive_constraints_data; 3152 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3153 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3154 total_counts_cc = 0; 3155 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3156 if (pcbddc->adaptive_constraints_n[i]) { 3157 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3158 } 3159 } 3160 #if 0 3161 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3162 for (i=0;i<total_counts_cc;i++) { 3163 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3164 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3165 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3166 printf(" %d",constraints_idxs[j]); 3167 } 3168 printf("\n"); 3169 printf("number of cc: %d\n",constraints_n[i]); 3170 } 3171 for (i=0;i<n_vertices;i++) { 3172 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3173 } 3174 for (i=0;i<sub_schurs->n_subs;i++) { 3175 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3176 } 3177 #endif 3178 3179 max_size_of_constraint = 0; 3180 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3181 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3182 /* Change of basis */ 3183 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3184 if (pcbddc->use_change_of_basis) { 3185 for (i=0;i<sub_schurs->n_subs;i++) { 3186 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3187 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3188 } 3189 } 3190 } 3191 } 3192 pcbddc->local_primal_size = total_counts; 3193 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3194 3195 /* map constraints_idxs in boundary numbering */ 3196 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3197 if (i != constraints_idxs_ptr[total_counts_cc]) { 3198 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3199 } 3200 3201 /* Create constraint matrix */ 3202 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3203 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3204 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3205 3206 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3207 /* determine if a QR strategy is needed for change of basis */ 3208 qr_needed = PETSC_FALSE; 3209 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3210 total_primal_vertices=0; 3211 pcbddc->local_primal_size_cc = 0; 3212 for (i=0;i<total_counts_cc;i++) { 3213 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3214 if (size_of_constraint == 1) { 3215 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3216 pcbddc->local_primal_size_cc += 1; 3217 } else if (PetscBTLookup(change_basis,i)) { 3218 for (k=0;k<constraints_n[i];k++) { 3219 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3220 } 3221 pcbddc->local_primal_size_cc += constraints_n[i]; 3222 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3223 PetscBTSet(qr_needed_idx,i); 3224 qr_needed = PETSC_TRUE; 3225 } 3226 } else { 3227 pcbddc->local_primal_size_cc += 1; 3228 } 3229 } 3230 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3231 pcbddc->n_vertices = total_primal_vertices; 3232 /* permute indices in order to have a sorted set of vertices */ 3233 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3234 3235 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3236 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3237 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3238 3239 /* nonzero structure of constraint matrix */ 3240 /* and get reference dof for local constraints */ 3241 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3242 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3243 3244 j = total_primal_vertices; 3245 total_counts = total_primal_vertices; 3246 cum = total_primal_vertices; 3247 for (i=n_vertices;i<total_counts_cc;i++) { 3248 if (!PetscBTLookup(change_basis,i)) { 3249 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3250 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3251 cum++; 3252 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3253 for (k=0;k<constraints_n[i];k++) { 3254 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3255 nnz[j+k] = size_of_constraint; 3256 } 3257 j += constraints_n[i]; 3258 } 3259 } 3260 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3261 ierr = PetscFree(nnz);CHKERRQ(ierr); 3262 3263 /* set values in constraint matrix */ 3264 for (i=0;i<total_primal_vertices;i++) { 3265 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3266 } 3267 total_counts = total_primal_vertices; 3268 for (i=n_vertices;i<total_counts_cc;i++) { 3269 if (!PetscBTLookup(change_basis,i)) { 3270 PetscInt *cols; 3271 3272 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3273 cols = constraints_idxs+constraints_idxs_ptr[i]; 3274 for (k=0;k<constraints_n[i];k++) { 3275 PetscInt row = total_counts+k; 3276 PetscScalar *vals; 3277 3278 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3279 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3280 } 3281 total_counts += constraints_n[i]; 3282 } 3283 } 3284 /* assembling */ 3285 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3286 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3287 3288 /* 3289 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3290 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3291 */ 3292 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3293 if (pcbddc->use_change_of_basis) { 3294 /* dual and primal dofs on a single cc */ 3295 PetscInt dual_dofs,primal_dofs; 3296 /* working stuff for GEQRF */ 3297 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3298 PetscBLASInt lqr_work; 3299 /* working stuff for UNGQR */ 3300 PetscScalar *gqr_work,lgqr_work_t; 3301 PetscBLASInt lgqr_work; 3302 /* working stuff for TRTRS */ 3303 PetscScalar *trs_rhs; 3304 PetscBLASInt Blas_NRHS; 3305 /* pointers for values insertion into change of basis matrix */ 3306 PetscInt *start_rows,*start_cols; 3307 PetscScalar *start_vals; 3308 /* working stuff for values insertion */ 3309 PetscBT is_primal; 3310 PetscInt *aux_primal_numbering_B; 3311 /* matrix sizes */ 3312 PetscInt global_size,local_size; 3313 /* temporary change of basis */ 3314 Mat localChangeOfBasisMatrix; 3315 /* extra space for debugging */ 3316 PetscScalar *dbg_work; 3317 3318 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3319 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3320 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3321 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3322 /* nonzeros for local mat */ 3323 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3324 for (i=0;i<pcis->n;i++) nnz[i]=1; 3325 for (i=n_vertices;i<total_counts_cc;i++) { 3326 if (PetscBTLookup(change_basis,i)) { 3327 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3328 if (PetscBTLookup(qr_needed_idx,i)) { 3329 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3330 } else { 3331 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3332 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3333 } 3334 } 3335 } 3336 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3337 ierr = PetscFree(nnz);CHKERRQ(ierr); 3338 /* Set initial identity in the matrix */ 3339 for (i=0;i<pcis->n;i++) { 3340 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3341 } 3342 3343 if (pcbddc->dbg_flag) { 3344 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3346 } 3347 3348 3349 /* Now we loop on the constraints which need a change of basis */ 3350 /* 3351 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3352 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3353 3354 Basic blocks of change of basis matrix T computed by 3355 3356 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3357 3358 | 1 0 ... 0 s_1/S | 3359 | 0 1 ... 0 s_2/S | 3360 | ... | 3361 | 0 ... 1 s_{n-1}/S | 3362 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3363 3364 with S = \sum_{i=1}^n s_i^2 3365 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3366 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3367 3368 - QR decomposition of constraints otherwise 3369 */ 3370 if (qr_needed) { 3371 /* space to store Q */ 3372 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3373 /* first we issue queries for optimal work */ 3374 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3375 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3376 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3377 lqr_work = -1; 3378 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3379 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3380 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3381 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3382 lgqr_work = -1; 3383 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3384 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3385 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3386 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3387 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3388 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3389 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3390 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3391 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3392 /* array to store scaling factors for reflectors */ 3393 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3394 /* array to store rhs and solution of triangular solver */ 3395 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3396 /* allocating workspace for check */ 3397 if (pcbddc->dbg_flag) { 3398 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3399 } 3400 } 3401 /* array to store whether a node is primal or not */ 3402 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3403 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3404 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3405 if (i != total_primal_vertices) { 3406 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3407 } 3408 for (i=0;i<total_primal_vertices;i++) { 3409 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3410 } 3411 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3412 3413 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3414 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3415 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3416 if (PetscBTLookup(change_basis,total_counts)) { 3417 /* get constraint info */ 3418 primal_dofs = constraints_n[total_counts]; 3419 dual_dofs = size_of_constraint-primal_dofs; 3420 3421 if (pcbddc->dbg_flag) { 3422 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3423 } 3424 3425 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3426 3427 /* copy quadrature constraints for change of basis check */ 3428 if (pcbddc->dbg_flag) { 3429 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3430 } 3431 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3432 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3433 3434 /* compute QR decomposition of constraints */ 3435 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3436 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3437 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3438 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3439 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3440 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3441 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3442 3443 /* explictly compute R^-T */ 3444 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3445 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3446 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3447 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3448 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3449 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3450 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3451 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3452 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3453 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3454 3455 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3456 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3457 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3458 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3459 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3460 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3461 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3462 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3463 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3464 3465 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3466 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3467 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3468 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3469 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3470 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3471 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3472 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3473 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3474 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3475 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3476 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3477 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3478 3479 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3480 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3481 /* insert cols for primal dofs */ 3482 for (j=0;j<primal_dofs;j++) { 3483 start_vals = &qr_basis[j*size_of_constraint]; 3484 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3485 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3486 } 3487 /* insert cols for dual dofs */ 3488 for (j=0,k=0;j<dual_dofs;k++) { 3489 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3490 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3491 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3492 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3493 j++; 3494 } 3495 } 3496 3497 /* check change of basis */ 3498 if (pcbddc->dbg_flag) { 3499 PetscInt ii,jj; 3500 PetscBool valid_qr=PETSC_TRUE; 3501 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3502 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3506 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3508 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3509 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3510 for (jj=0;jj<size_of_constraint;jj++) { 3511 for (ii=0;ii<primal_dofs;ii++) { 3512 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3513 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3514 } 3515 } 3516 if (!valid_qr) { 3517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3518 for (jj=0;jj<size_of_constraint;jj++) { 3519 for (ii=0;ii<primal_dofs;ii++) { 3520 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3521 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3522 } 3523 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3524 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3525 } 3526 } 3527 } 3528 } else { 3529 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3530 } 3531 } 3532 } else { /* simple transformation block */ 3533 PetscInt row,col; 3534 PetscScalar val,norm; 3535 3536 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3537 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3538 for (j=0;j<size_of_constraint;j++) { 3539 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3540 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3541 if (!PetscBTLookup(is_primal,row_B)) { 3542 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3543 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3544 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3545 } else { 3546 for (k=0;k<size_of_constraint;k++) { 3547 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3548 if (row != col) { 3549 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3550 } else { 3551 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3552 } 3553 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3554 } 3555 } 3556 } 3557 if (pcbddc->dbg_flag) { 3558 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3559 } 3560 } 3561 } else { 3562 if (pcbddc->dbg_flag) { 3563 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3564 } 3565 } 3566 } 3567 3568 /* free workspace */ 3569 if (qr_needed) { 3570 if (pcbddc->dbg_flag) { 3571 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3572 } 3573 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3574 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3575 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3576 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3577 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3578 } 3579 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3580 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3581 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3582 3583 /* assembling of global change of variable */ 3584 { 3585 Mat tmat; 3586 PetscInt bs; 3587 3588 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3589 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3590 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3591 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3592 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3593 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3594 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3595 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3596 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3597 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3598 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3599 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3600 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3601 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3602 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3603 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3604 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3605 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3606 } 3607 /* check */ 3608 if (pcbddc->dbg_flag) { 3609 PetscReal error; 3610 Vec x,x_change; 3611 3612 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3613 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3614 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3615 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3616 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3617 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3618 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3619 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3620 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3621 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3622 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3623 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3624 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3625 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3626 ierr = VecDestroy(&x);CHKERRQ(ierr); 3627 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3628 } 3629 3630 /* adapt sub_schurs computed (if any) */ 3631 if (pcbddc->use_deluxe_scaling) { 3632 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3633 if (sub_schurs->S_Ej_all) { 3634 Mat S_new,tmat; 3635 ISLocalToGlobalMapping NtoSall; 3636 IS is_all_N,is_V,is_V_Sall; 3637 const PetscScalar *array; 3638 const PetscInt *idxs_V,*idxs_all; 3639 PetscInt i,n_V; 3640 3641 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3642 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3643 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3644 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3645 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3646 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3647 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3648 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3649 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3650 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3651 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3652 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3653 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3654 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3655 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3656 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3657 for (i=0;i<n_V;i++) { 3658 PetscScalar val; 3659 PetscInt idx; 3660 3661 idx = idxs_V[i]; 3662 val = array[idxs_all[idxs_V[i]]]; 3663 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3664 } 3665 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3666 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3667 sub_schurs->S_Ej_all = S_new; 3668 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3669 if (sub_schurs->sum_S_Ej_all) { 3670 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3671 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3672 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3673 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3674 sub_schurs->sum_S_Ej_all = S_new; 3675 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3676 } 3677 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3678 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3679 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3680 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3681 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3682 } 3683 } 3684 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3685 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3686 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3687 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3688 } 3689 3690 /* set up change of basis context */ 3691 if (pcbddc->ChangeOfBasisMatrix) { 3692 PCBDDCChange_ctx change_ctx; 3693 3694 if (!pcbddc->new_global_mat) { 3695 PetscInt global_size,local_size; 3696 3697 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3698 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3699 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3700 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3701 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3702 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3703 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3704 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3705 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3706 } else { 3707 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3708 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3709 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3710 } 3711 if (!pcbddc->user_ChangeOfBasisMatrix) { 3712 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3713 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3714 } else { 3715 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3716 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3717 } 3718 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3719 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3720 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3721 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3722 } else { 3723 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 3724 } 3725 3726 /* add pressure dofs to set of primal nodes for numbering purposes */ 3727 for (i=0;i<pcbddc->benign_n;i++) { 3728 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3729 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3730 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3731 pcbddc->local_primal_size_cc++; 3732 pcbddc->local_primal_size++; 3733 } 3734 3735 /* check if a new primal space has been introduced (also take into account benign trick) */ 3736 pcbddc->new_primal_space_local = PETSC_TRUE; 3737 if (olocal_primal_size == pcbddc->local_primal_size) { 3738 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3739 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3740 if (!pcbddc->new_primal_space_local) { 3741 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3742 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3743 } 3744 } 3745 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3746 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3747 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3748 3749 /* flush dbg viewer */ 3750 if (pcbddc->dbg_flag) { 3751 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3752 } 3753 3754 /* free workspace */ 3755 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3756 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3757 if (!pcbddc->adaptive_selection) { 3758 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3759 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3760 } else { 3761 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3762 pcbddc->adaptive_constraints_idxs_ptr, 3763 pcbddc->adaptive_constraints_data_ptr, 3764 pcbddc->adaptive_constraints_idxs, 3765 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3766 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3767 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3768 } 3769 PetscFunctionReturn(0); 3770 } 3771 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3774 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3775 { 3776 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3777 PC_IS *pcis = (PC_IS*)pc->data; 3778 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3779 PetscInt ierr,i,vertex_size,N; 3780 PetscViewer viewer=pcbddc->dbg_viewer; 3781 3782 PetscFunctionBegin; 3783 /* Reset previously computed graph */ 3784 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3785 /* Init local Graph struct */ 3786 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3787 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3788 3789 /* Check validity of the csr graph passed in by the user */ 3790 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3791 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3792 } 3793 3794 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3795 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3796 PetscInt *xadj,*adjncy; 3797 PetscInt nvtxs; 3798 PetscBool flg_row=PETSC_FALSE; 3799 3800 if (pcbddc->use_local_adj) { 3801 3802 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3803 if (flg_row) { 3804 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3805 pcbddc->computed_rowadj = PETSC_TRUE; 3806 } 3807 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3808 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3809 IS is_dummy; 3810 ISLocalToGlobalMapping l2gmap_dummy; 3811 PetscInt j,sum; 3812 PetscInt *cxadj,*cadjncy; 3813 const PetscInt *idxs; 3814 PCBDDCGraph graph; 3815 PetscBT is_on_boundary; 3816 3817 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3818 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3819 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3820 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3821 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3822 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3823 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3824 if (flg_row) { 3825 graph->xadj = xadj; 3826 graph->adjncy = adjncy; 3827 } 3828 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3829 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3830 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3831 3832 if (pcbddc->dbg_flag) { 3833 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3834 for (i=0;i<graph->ncc;i++) { 3835 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3836 } 3837 } 3838 3839 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3840 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3841 for (i=0;i<pcis->n_B;i++) { 3842 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3843 } 3844 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3845 3846 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3847 sum = 0; 3848 for (i=0;i<graph->ncc;i++) { 3849 PetscInt sizecc = 0; 3850 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3851 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3852 sizecc++; 3853 } 3854 } 3855 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3856 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3857 cxadj[graph->queue[j]] = sizecc; 3858 } 3859 } 3860 sum += sizecc*sizecc; 3861 } 3862 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3863 sum = 0; 3864 for (i=0;i<pcis->n;i++) { 3865 PetscInt temp = cxadj[i]; 3866 cxadj[i] = sum; 3867 sum += temp; 3868 } 3869 cxadj[pcis->n] = sum; 3870 for (i=0;i<graph->ncc;i++) { 3871 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3872 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3873 PetscInt k,sizecc = 0; 3874 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3875 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3876 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3877 sizecc++; 3878 } 3879 } 3880 } 3881 } 3882 } 3883 if (sum) { 3884 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3885 } else { 3886 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3887 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3888 } 3889 graph->xadj = 0; 3890 graph->adjncy = 0; 3891 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3892 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3893 } 3894 } 3895 if (pcbddc->dbg_flag) { 3896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3897 } 3898 3899 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3900 vertex_size = 1; 3901 if (pcbddc->user_provided_isfordofs) { 3902 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3903 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3904 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3905 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3906 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3907 } 3908 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3909 pcbddc->n_ISForDofs = 0; 3910 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3911 } 3912 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3913 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3914 } else { 3915 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3916 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3917 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3918 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3919 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3920 } 3921 } 3922 } 3923 3924 /* Setup of Graph */ 3925 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3926 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3927 } 3928 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3929 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3930 } 3931 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 3932 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3933 } 3934 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3935 3936 /* attach info on disconnected subdomains if present */ 3937 if (pcbddc->n_local_subs) { 3938 PetscInt *local_subs; 3939 3940 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3941 for (i=0;i<pcbddc->n_local_subs;i++) { 3942 const PetscInt *idxs; 3943 PetscInt nl,j; 3944 3945 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3946 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3947 for (j=0;j<nl;j++) { 3948 local_subs[idxs[j]] = i; 3949 } 3950 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3951 } 3952 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3953 pcbddc->mat_graph->local_subs = local_subs; 3954 } 3955 3956 /* Graph's connected components analysis */ 3957 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3958 3959 /* print some info to stdout */ 3960 if (pcbddc->dbg_flag) { 3961 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3962 } 3963 3964 /* mark topography has done */ 3965 pcbddc->recompute_topography = PETSC_FALSE; 3966 PetscFunctionReturn(0); 3967 } 3968 3969 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3970 #undef __FUNCT__ 3971 #define __FUNCT__ "PCBDDCSubsetNumbering" 3972 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3973 { 3974 PetscSF sf; 3975 PetscLayout map; 3976 const PetscInt *idxs; 3977 PetscInt *leaf_data,*root_data,*gidxs; 3978 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3979 PetscInt n_n,nlocals,start,first_index; 3980 PetscMPIInt commsize; 3981 PetscBool first_found; 3982 PetscErrorCode ierr; 3983 3984 PetscFunctionBegin; 3985 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3986 if (subset_mult) { 3987 PetscCheckSameComm(subset,1,subset_mult,2); 3988 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3989 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3990 } 3991 /* create workspace layout for computing global indices of subset */ 3992 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3993 lbounds[0] = lbounds[1] = 0; 3994 for (i=0;i<n;i++) { 3995 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3996 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3997 } 3998 lbounds[0] = -lbounds[0]; 3999 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4000 gbounds[0] = -gbounds[0]; 4001 N = gbounds[1] - gbounds[0] + 1; 4002 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4003 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4004 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4005 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4006 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4007 4008 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4009 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4010 if (subset_mult) { 4011 const PetscInt* idxs_mult; 4012 4013 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4014 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4015 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4016 } else { 4017 for (i=0;i<n;i++) leaf_data[i] = 1; 4018 } 4019 /* local size of new subset */ 4020 n_n = 0; 4021 for (i=0;i<n;i++) n_n += leaf_data[i]; 4022 4023 /* global indexes in layout */ 4024 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4025 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4026 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4028 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4029 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4030 4031 /* reduce from leaves to roots */ 4032 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4033 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4034 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4035 4036 /* count indexes in local part of layout */ 4037 nlocals = 0; 4038 first_index = -1; 4039 first_found = PETSC_FALSE; 4040 for (i=0;i<Nl;i++) { 4041 if (!first_found && root_data[i]) { 4042 first_found = PETSC_TRUE; 4043 first_index = i; 4044 } 4045 nlocals += root_data[i]; 4046 } 4047 4048 /* cumulative of number of indexes and size of subset without holes */ 4049 #if defined(PETSC_HAVE_MPI_EXSCAN) 4050 start = 0; 4051 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4052 #else 4053 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4054 start = start-nlocals; 4055 #endif 4056 4057 if (N_n) { /* compute total size of new subset if requested */ 4058 *N_n = start + nlocals; 4059 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4060 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4061 } 4062 4063 /* adapt root data with cumulative */ 4064 if (first_found) { 4065 PetscInt old_index; 4066 4067 root_data[first_index] += start; 4068 old_index = first_index; 4069 for (i=first_index+1;i<Nl;i++) { 4070 if (root_data[i]) { 4071 root_data[i] += root_data[old_index]; 4072 old_index = i; 4073 } 4074 } 4075 } 4076 4077 /* from roots to leaves */ 4078 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4079 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4080 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4081 4082 /* create new IS with global indexes without holes */ 4083 if (subset_mult) { 4084 const PetscInt* idxs_mult; 4085 PetscInt cum; 4086 4087 cum = 0; 4088 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4089 for (i=0;i<n;i++) { 4090 PetscInt j; 4091 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4092 } 4093 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4094 } else { 4095 for (i=0;i<n;i++) { 4096 gidxs[i] = leaf_data[i]-1; 4097 } 4098 } 4099 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4100 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4101 PetscFunctionReturn(0); 4102 } 4103 4104 #undef __FUNCT__ 4105 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4106 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4107 { 4108 PetscInt i,j; 4109 PetscScalar *alphas; 4110 PetscErrorCode ierr; 4111 4112 PetscFunctionBegin; 4113 /* this implements stabilized Gram-Schmidt */ 4114 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4115 for (i=0;i<n;i++) { 4116 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4117 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4118 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4119 } 4120 ierr = PetscFree(alphas);CHKERRQ(ierr); 4121 PetscFunctionReturn(0); 4122 } 4123 4124 #undef __FUNCT__ 4125 #define __FUNCT__ "MatISGetSubassemblingPattern" 4126 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4127 { 4128 IS ranks_send_to; 4129 PetscInt n_neighs,*neighs,*n_shared,**shared; 4130 PetscMPIInt size,rank,color; 4131 PetscInt *xadj,*adjncy; 4132 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4133 PetscInt i,local_size,threshold=0; 4134 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4135 PetscSubcomm subcomm; 4136 PetscErrorCode ierr; 4137 4138 PetscFunctionBegin; 4139 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4140 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4141 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4142 4143 /* Get info on mapping */ 4144 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4145 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4146 4147 /* build local CSR graph of subdomains' connectivity */ 4148 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4149 xadj[0] = 0; 4150 xadj[1] = PetscMax(n_neighs-1,0); 4151 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4152 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4153 4154 if (threshold) { 4155 PetscInt xadj_count = 0; 4156 for (i=1;i<n_neighs;i++) { 4157 if (n_shared[i] > threshold) { 4158 adjncy[xadj_count] = neighs[i]; 4159 adjncy_wgt[xadj_count] = n_shared[i]; 4160 xadj_count++; 4161 } 4162 } 4163 xadj[1] = xadj_count; 4164 } else { 4165 if (xadj[1]) { 4166 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4167 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4168 } 4169 } 4170 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4171 if (use_square) { 4172 for (i=0;i<xadj[1];i++) { 4173 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4174 } 4175 } 4176 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4177 4178 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4179 4180 /* 4181 Restrict work on active processes only. 4182 */ 4183 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4184 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4185 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4186 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4187 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4188 if (color) { 4189 ierr = PetscFree(xadj);CHKERRQ(ierr); 4190 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4191 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4192 } else { 4193 Mat subdomain_adj; 4194 IS new_ranks,new_ranks_contig; 4195 MatPartitioning partitioner; 4196 PetscInt prank,rstart=0,rend=0; 4197 PetscInt *is_indices,*oldranks; 4198 PetscBool aggregate; 4199 4200 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4201 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4202 prank = rank; 4203 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4204 /* 4205 for (i=0;i<size;i++) { 4206 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4207 } 4208 */ 4209 for (i=0;i<xadj[1];i++) { 4210 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4211 } 4212 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4213 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4214 if (aggregate) { 4215 PetscInt lrows,row,ncols,*cols; 4216 PetscMPIInt nrank; 4217 PetscScalar *vals; 4218 4219 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4220 lrows = 0; 4221 if (nrank<redprocs) { 4222 lrows = size/redprocs; 4223 if (nrank<size%redprocs) lrows++; 4224 } 4225 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4226 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4227 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4228 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4229 row = nrank; 4230 ncols = xadj[1]-xadj[0]; 4231 cols = adjncy; 4232 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4233 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4234 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4235 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4236 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4237 ierr = PetscFree(xadj);CHKERRQ(ierr); 4238 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4239 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4240 ierr = PetscFree(vals);CHKERRQ(ierr); 4241 } else { 4242 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4243 } 4244 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4245 4246 /* Partition */ 4247 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4248 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4249 if (use_vwgt) { 4250 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4251 v_wgt[0] = local_size; 4252 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4253 } 4254 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4255 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4256 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4257 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4258 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4259 4260 /* renumber new_ranks to avoid "holes" in new set of processors */ 4261 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4262 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4263 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4264 if (!redprocs) { 4265 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4266 } else { 4267 PetscInt idxs[1]; 4268 PetscMPIInt tag; 4269 MPI_Request *reqs; 4270 4271 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4272 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4273 for (i=rstart;i<rend;i++) { 4274 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4275 } 4276 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4277 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4278 ierr = PetscFree(reqs);CHKERRQ(ierr); 4279 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4280 } 4281 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4282 /* clean up */ 4283 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4284 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4285 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4286 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4287 } 4288 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4289 4290 /* assemble parallel IS for sends */ 4291 i = 1; 4292 if (color) i=0; 4293 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4294 /* get back IS */ 4295 *is_sends = ranks_send_to; 4296 PetscFunctionReturn(0); 4297 } 4298 4299 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4300 4301 #undef __FUNCT__ 4302 #define __FUNCT__ "MatISSubassemble" 4303 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4304 { 4305 Mat local_mat; 4306 IS is_sends_internal; 4307 PetscInt rows,cols,new_local_rows; 4308 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4309 PetscBool ismatis,isdense,newisdense,destroy_mat; 4310 ISLocalToGlobalMapping l2gmap; 4311 PetscInt* l2gmap_indices; 4312 const PetscInt* is_indices; 4313 MatType new_local_type; 4314 /* buffers */ 4315 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4316 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4317 PetscInt *recv_buffer_idxs_local; 4318 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4319 /* MPI */ 4320 MPI_Comm comm,comm_n; 4321 PetscSubcomm subcomm; 4322 PetscMPIInt n_sends,n_recvs,commsize; 4323 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4324 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4325 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4326 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4327 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4328 PetscErrorCode ierr; 4329 4330 PetscFunctionBegin; 4331 /* TODO: add missing checks */ 4332 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4333 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4334 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4335 PetscValidLogicalCollectiveInt(mat,nis,7); 4336 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4337 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4338 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4339 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4340 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4341 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4342 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4343 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4344 PetscInt mrows,mcols,mnrows,mncols; 4345 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4346 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4347 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4348 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4349 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4350 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4351 } 4352 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4353 PetscValidLogicalCollectiveInt(mat,bs,0); 4354 /* prepare IS for sending if not provided */ 4355 if (!is_sends) { 4356 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4357 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4358 } else { 4359 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4360 is_sends_internal = is_sends; 4361 } 4362 4363 /* get comm */ 4364 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4365 4366 /* compute number of sends */ 4367 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4368 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4369 4370 /* compute number of receives */ 4371 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4372 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4373 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4374 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4375 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4376 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4377 ierr = PetscFree(iflags);CHKERRQ(ierr); 4378 4379 /* restrict comm if requested */ 4380 subcomm = 0; 4381 destroy_mat = PETSC_FALSE; 4382 if (restrict_comm) { 4383 PetscMPIInt color,subcommsize; 4384 4385 color = 0; 4386 if (restrict_full) { 4387 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4388 } else { 4389 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4390 } 4391 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4392 subcommsize = commsize - subcommsize; 4393 /* check if reuse has been requested */ 4394 if (reuse == MAT_REUSE_MATRIX) { 4395 if (*mat_n) { 4396 PetscMPIInt subcommsize2; 4397 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4398 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4399 comm_n = PetscObjectComm((PetscObject)*mat_n); 4400 } else { 4401 comm_n = PETSC_COMM_SELF; 4402 } 4403 } else { /* MAT_INITIAL_MATRIX */ 4404 PetscMPIInt rank; 4405 4406 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4407 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4408 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4409 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4410 comm_n = PetscSubcommChild(subcomm); 4411 } 4412 /* flag to destroy *mat_n if not significative */ 4413 if (color) destroy_mat = PETSC_TRUE; 4414 } else { 4415 comm_n = comm; 4416 } 4417 4418 /* prepare send/receive buffers */ 4419 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4420 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4421 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4422 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4423 if (nis) { 4424 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4425 } 4426 4427 /* Get data from local matrices */ 4428 if (!isdense) { 4429 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4430 /* TODO: See below some guidelines on how to prepare the local buffers */ 4431 /* 4432 send_buffer_vals should contain the raw values of the local matrix 4433 send_buffer_idxs should contain: 4434 - MatType_PRIVATE type 4435 - PetscInt size_of_l2gmap 4436 - PetscInt global_row_indices[size_of_l2gmap] 4437 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4438 */ 4439 } else { 4440 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4441 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4442 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4443 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4444 send_buffer_idxs[1] = i; 4445 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4446 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4447 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4448 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4449 for (i=0;i<n_sends;i++) { 4450 ilengths_vals[is_indices[i]] = len*len; 4451 ilengths_idxs[is_indices[i]] = len+2; 4452 } 4453 } 4454 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4455 /* additional is (if any) */ 4456 if (nis) { 4457 PetscMPIInt psum; 4458 PetscInt j; 4459 for (j=0,psum=0;j<nis;j++) { 4460 PetscInt plen; 4461 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4462 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4463 psum += len+1; /* indices + lenght */ 4464 } 4465 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4466 for (j=0,psum=0;j<nis;j++) { 4467 PetscInt plen; 4468 const PetscInt *is_array_idxs; 4469 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4470 send_buffer_idxs_is[psum] = plen; 4471 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4472 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4473 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4474 psum += plen+1; /* indices + lenght */ 4475 } 4476 for (i=0;i<n_sends;i++) { 4477 ilengths_idxs_is[is_indices[i]] = psum; 4478 } 4479 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4480 } 4481 4482 buf_size_idxs = 0; 4483 buf_size_vals = 0; 4484 buf_size_idxs_is = 0; 4485 for (i=0;i<n_recvs;i++) { 4486 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4487 buf_size_vals += (PetscInt)olengths_vals[i]; 4488 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4489 } 4490 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4491 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4492 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4493 4494 /* get new tags for clean communications */ 4495 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4496 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4497 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4498 4499 /* allocate for requests */ 4500 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4501 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4502 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4503 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4504 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4505 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4506 4507 /* communications */ 4508 ptr_idxs = recv_buffer_idxs; 4509 ptr_vals = recv_buffer_vals; 4510 ptr_idxs_is = recv_buffer_idxs_is; 4511 for (i=0;i<n_recvs;i++) { 4512 source_dest = onodes[i]; 4513 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4514 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4515 ptr_idxs += olengths_idxs[i]; 4516 ptr_vals += olengths_vals[i]; 4517 if (nis) { 4518 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4519 ptr_idxs_is += olengths_idxs_is[i]; 4520 } 4521 } 4522 for (i=0;i<n_sends;i++) { 4523 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4524 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4525 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4526 if (nis) { 4527 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4528 } 4529 } 4530 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4531 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4532 4533 /* assemble new l2g map */ 4534 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4535 ptr_idxs = recv_buffer_idxs; 4536 new_local_rows = 0; 4537 for (i=0;i<n_recvs;i++) { 4538 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4539 ptr_idxs += olengths_idxs[i]; 4540 } 4541 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4542 ptr_idxs = recv_buffer_idxs; 4543 new_local_rows = 0; 4544 for (i=0;i<n_recvs;i++) { 4545 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4546 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4547 ptr_idxs += olengths_idxs[i]; 4548 } 4549 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4550 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4551 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4552 4553 /* infer new local matrix type from received local matrices type */ 4554 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4555 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4556 if (n_recvs) { 4557 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4558 ptr_idxs = recv_buffer_idxs; 4559 for (i=0;i<n_recvs;i++) { 4560 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4561 new_local_type_private = MATAIJ_PRIVATE; 4562 break; 4563 } 4564 ptr_idxs += olengths_idxs[i]; 4565 } 4566 switch (new_local_type_private) { 4567 case MATDENSE_PRIVATE: 4568 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4569 new_local_type = MATSEQAIJ; 4570 bs = 1; 4571 } else { /* if I receive only 1 dense matrix */ 4572 new_local_type = MATSEQDENSE; 4573 bs = 1; 4574 } 4575 break; 4576 case MATAIJ_PRIVATE: 4577 new_local_type = MATSEQAIJ; 4578 bs = 1; 4579 break; 4580 case MATBAIJ_PRIVATE: 4581 new_local_type = MATSEQBAIJ; 4582 break; 4583 case MATSBAIJ_PRIVATE: 4584 new_local_type = MATSEQSBAIJ; 4585 break; 4586 default: 4587 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4588 break; 4589 } 4590 } else { /* by default, new_local_type is seqdense */ 4591 new_local_type = MATSEQDENSE; 4592 bs = 1; 4593 } 4594 4595 /* create MATIS object if needed */ 4596 if (reuse == MAT_INITIAL_MATRIX) { 4597 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4598 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4599 } else { 4600 /* it also destroys the local matrices */ 4601 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4602 } 4603 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4604 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4605 4606 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4607 4608 /* Global to local map of received indices */ 4609 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4610 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4611 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4612 4613 /* restore attributes -> type of incoming data and its size */ 4614 buf_size_idxs = 0; 4615 for (i=0;i<n_recvs;i++) { 4616 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4617 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4618 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4619 } 4620 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4621 4622 /* set preallocation */ 4623 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4624 if (!newisdense) { 4625 PetscInt *new_local_nnz=0; 4626 4627 ptr_vals = recv_buffer_vals; 4628 ptr_idxs = recv_buffer_idxs_local; 4629 if (n_recvs) { 4630 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4631 } 4632 for (i=0;i<n_recvs;i++) { 4633 PetscInt j; 4634 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4635 for (j=0;j<*(ptr_idxs+1);j++) { 4636 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4637 } 4638 } else { 4639 /* TODO */ 4640 } 4641 ptr_idxs += olengths_idxs[i]; 4642 } 4643 if (new_local_nnz) { 4644 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4645 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4646 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4647 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4648 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4649 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4650 } else { 4651 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4652 } 4653 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4654 } else { 4655 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4656 } 4657 4658 /* set values */ 4659 ptr_vals = recv_buffer_vals; 4660 ptr_idxs = recv_buffer_idxs_local; 4661 for (i=0;i<n_recvs;i++) { 4662 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4663 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4664 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4665 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4666 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4667 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4668 } else { 4669 /* TODO */ 4670 } 4671 ptr_idxs += olengths_idxs[i]; 4672 ptr_vals += olengths_vals[i]; 4673 } 4674 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4675 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4676 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4677 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4678 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4679 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4680 4681 #if 0 4682 if (!restrict_comm) { /* check */ 4683 Vec lvec,rvec; 4684 PetscReal infty_error; 4685 4686 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4687 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4688 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4689 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4690 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4691 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4692 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4693 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4694 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4695 } 4696 #endif 4697 4698 /* assemble new additional is (if any) */ 4699 if (nis) { 4700 PetscInt **temp_idxs,*count_is,j,psum; 4701 4702 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4703 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4704 ptr_idxs = recv_buffer_idxs_is; 4705 psum = 0; 4706 for (i=0;i<n_recvs;i++) { 4707 for (j=0;j<nis;j++) { 4708 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4709 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4710 psum += plen; 4711 ptr_idxs += plen+1; /* shift pointer to received data */ 4712 } 4713 } 4714 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4715 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4716 for (i=1;i<nis;i++) { 4717 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4718 } 4719 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4720 ptr_idxs = recv_buffer_idxs_is; 4721 for (i=0;i<n_recvs;i++) { 4722 for (j=0;j<nis;j++) { 4723 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4724 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4725 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4726 ptr_idxs += plen+1; /* shift pointer to received data */ 4727 } 4728 } 4729 for (i=0;i<nis;i++) { 4730 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4731 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4732 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4733 } 4734 ierr = PetscFree(count_is);CHKERRQ(ierr); 4735 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4736 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4737 } 4738 /* free workspace */ 4739 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4740 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4741 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4742 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4743 if (isdense) { 4744 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4745 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4746 } else { 4747 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4748 } 4749 if (nis) { 4750 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4751 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4752 } 4753 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4754 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4755 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4756 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4757 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4758 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4759 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4760 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4761 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4762 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4763 ierr = PetscFree(onodes);CHKERRQ(ierr); 4764 if (nis) { 4765 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4766 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4767 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4768 } 4769 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4770 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4771 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4772 for (i=0;i<nis;i++) { 4773 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4774 } 4775 *mat_n = NULL; 4776 } 4777 PetscFunctionReturn(0); 4778 } 4779 4780 /* temporary hack into ksp private data structure */ 4781 #include <petsc/private/kspimpl.h> 4782 4783 #undef __FUNCT__ 4784 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4785 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4786 { 4787 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4788 PC_IS *pcis = (PC_IS*)pc->data; 4789 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4790 MatNullSpace CoarseNullSpace=NULL; 4791 ISLocalToGlobalMapping coarse_islg; 4792 IS coarse_is,*isarray; 4793 PetscInt i,im_active=-1,active_procs=-1; 4794 PetscInt nis,nisdofs,nisneu,nisvert; 4795 PC pc_temp; 4796 PCType coarse_pc_type; 4797 KSPType coarse_ksp_type; 4798 PetscBool multilevel_requested,multilevel_allowed; 4799 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4800 Mat t_coarse_mat_is; 4801 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4802 PetscMPIInt all_procs; 4803 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4804 PetscBool compute_vecs = PETSC_FALSE; 4805 PetscScalar *array; 4806 PetscErrorCode ierr; 4807 4808 PetscFunctionBegin; 4809 /* Assign global numbering to coarse dofs */ 4810 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4811 PetscInt ocoarse_size; 4812 compute_vecs = PETSC_TRUE; 4813 ocoarse_size = pcbddc->coarse_size; 4814 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4815 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4816 /* see if we can avoid some work */ 4817 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4818 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4819 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4820 PC pc; 4821 PetscBool isbddc; 4822 4823 /* temporary workaround since PCBDDC does not have a reset method so far */ 4824 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4825 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4826 if (isbddc) { 4827 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4828 } 4829 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4830 coarse_reuse = PETSC_FALSE; 4831 } else { /* we can safely reuse already computed coarse matrix */ 4832 coarse_reuse = PETSC_TRUE; 4833 } 4834 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4835 coarse_reuse = PETSC_FALSE; 4836 } 4837 /* reset any subassembling information */ 4838 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4839 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4840 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4841 coarse_reuse = PETSC_TRUE; 4842 } 4843 4844 /* count "active" (i.e. with positive local size) and "void" processes */ 4845 im_active = !!(pcis->n); 4846 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4847 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4848 void_procs = all_procs-active_procs; 4849 csin_type_simple = PETSC_TRUE; 4850 redist = PETSC_FALSE; 4851 if (pcbddc->current_level && void_procs) { 4852 csin_ml = PETSC_TRUE; 4853 ncoarse_ml = void_procs; 4854 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4855 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4856 csin_ds = PETSC_TRUE; 4857 ncoarse_ds = pcbddc->redistribute_coarse; 4858 redist = PETSC_TRUE; 4859 } else { 4860 csin_ds = PETSC_TRUE; 4861 ncoarse_ds = active_procs; 4862 redist = PETSC_TRUE; 4863 } 4864 } else { 4865 csin_ml = PETSC_FALSE; 4866 ncoarse_ml = all_procs; 4867 if (void_procs) { 4868 csin_ds = PETSC_TRUE; 4869 ncoarse_ds = void_procs; 4870 csin_type_simple = PETSC_FALSE; 4871 } else { 4872 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4873 csin_ds = PETSC_TRUE; 4874 ncoarse_ds = pcbddc->redistribute_coarse; 4875 redist = PETSC_TRUE; 4876 } else { 4877 csin_ds = PETSC_FALSE; 4878 ncoarse_ds = all_procs; 4879 } 4880 } 4881 } 4882 4883 /* 4884 test if we can go multilevel: three conditions must be satisfied: 4885 - we have not exceeded the number of levels requested 4886 - we can actually subassemble the active processes 4887 - we can find a suitable number of MPI processes where we can place the subassembled problem 4888 */ 4889 multilevel_allowed = PETSC_FALSE; 4890 multilevel_requested = PETSC_FALSE; 4891 if (pcbddc->current_level < pcbddc->max_levels) { 4892 multilevel_requested = PETSC_TRUE; 4893 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4894 multilevel_allowed = PETSC_FALSE; 4895 } else { 4896 multilevel_allowed = PETSC_TRUE; 4897 } 4898 } 4899 /* determine number of process partecipating to coarse solver */ 4900 if (multilevel_allowed) { 4901 ncoarse = ncoarse_ml; 4902 csin = csin_ml; 4903 redist = PETSC_FALSE; 4904 } else { 4905 ncoarse = ncoarse_ds; 4906 csin = csin_ds; 4907 } 4908 4909 /* creates temporary l2gmap and IS for coarse indexes */ 4910 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4911 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4912 4913 /* creates temporary MATIS object for coarse matrix */ 4914 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4915 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4916 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4917 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4918 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4919 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4920 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4921 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4922 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4923 4924 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4925 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 4926 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4927 const PetscInt *idxs; 4928 ISLocalToGlobalMapping tmap; 4929 4930 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4931 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4932 /* allocate space for temporary storage */ 4933 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4934 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4935 /* allocate for IS array */ 4936 nisdofs = pcbddc->n_ISForDofsLocal; 4937 nisneu = !!pcbddc->NeumannBoundariesLocal; 4938 nisvert = 0; 4939 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4940 nisvert = 1; 4941 } 4942 nis = nisdofs + nisneu + nisvert; 4943 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4944 /* dofs splitting */ 4945 for (i=0;i<nisdofs;i++) { 4946 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4947 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4948 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4949 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4950 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4951 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4952 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4953 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4954 } 4955 /* neumann boundaries */ 4956 if (pcbddc->NeumannBoundariesLocal) { 4957 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4958 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4959 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4960 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4961 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4962 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4963 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4964 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4965 } 4966 /* primal vertices (benign) */ 4967 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4968 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 4969 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4970 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4971 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4972 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4973 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 4974 } 4975 /* free memory */ 4976 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4977 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4978 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4979 } else { 4980 nis = 0; 4981 nisdofs = 0; 4982 nisneu = 0; 4983 nisvert = 0; 4984 isarray = NULL; 4985 } 4986 /* destroy no longer needed map */ 4987 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4988 4989 /* restrict on coarse candidates (if needed) */ 4990 coarse_mat_is = NULL; 4991 if (csin) { 4992 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4993 if (redist) { 4994 PetscMPIInt rank; 4995 PetscInt spc,n_spc_p1,dest[1],destsize; 4996 4997 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4998 spc = active_procs/ncoarse; 4999 n_spc_p1 = active_procs%ncoarse; 5000 if (im_active) { 5001 destsize = 1; 5002 if (rank > n_spc_p1*(spc+1)-1) { 5003 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5004 } else { 5005 dest[0] = rank/(spc+1); 5006 } 5007 } else { 5008 destsize = 0; 5009 } 5010 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5011 } else if (csin_type_simple) { 5012 PetscMPIInt rank; 5013 PetscInt issize,isidx; 5014 5015 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5016 if (im_active) { 5017 issize = 1; 5018 isidx = (PetscInt)rank; 5019 } else { 5020 issize = 0; 5021 isidx = -1; 5022 } 5023 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5024 } else { /* get a suitable subassembling pattern from MATIS code */ 5025 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5026 } 5027 5028 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5029 if (!redist || ncoarse <= void_procs) { 5030 PetscInt ncoarse_cand,tissize,*nisindices; 5031 PetscInt *coarse_candidates; 5032 const PetscInt* tisindices; 5033 5034 /* get coarse candidates' ranks in pc communicator */ 5035 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5036 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5037 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5038 if (!coarse_candidates[i]) { 5039 coarse_candidates[ncoarse_cand++]=i; 5040 } 5041 } 5042 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5043 5044 5045 if (pcbddc->dbg_flag) { 5046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5047 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5048 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5049 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5050 for (i=0;i<ncoarse_cand;i++) { 5051 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5052 } 5053 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5054 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5055 } 5056 /* shift the pattern on coarse candidates */ 5057 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5058 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5059 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5060 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5061 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5062 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5063 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5064 } 5065 if (pcbddc->dbg_flag) { 5066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5067 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5068 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5069 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5070 } 5071 } 5072 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5073 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5074 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5075 } else { /* this is the last level, so use just receiving processes in subcomm */ 5076 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5077 } 5078 } else { 5079 if (pcbddc->dbg_flag) { 5080 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5081 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5082 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5083 } 5084 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5085 coarse_mat_is = t_coarse_mat_is; 5086 } 5087 5088 /* create local to global scatters for coarse problem */ 5089 if (compute_vecs) { 5090 PetscInt lrows; 5091 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5092 if (coarse_mat_is) { 5093 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5094 } else { 5095 lrows = 0; 5096 } 5097 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5098 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5099 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5100 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5101 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5102 } 5103 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5104 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5105 5106 /* set defaults for coarse KSP and PC */ 5107 if (multilevel_allowed) { 5108 coarse_ksp_type = KSPRICHARDSON; 5109 coarse_pc_type = PCBDDC; 5110 } else { 5111 coarse_ksp_type = KSPPREONLY; 5112 coarse_pc_type = PCREDUNDANT; 5113 } 5114 5115 /* print some info if requested */ 5116 if (pcbddc->dbg_flag) { 5117 if (!multilevel_allowed) { 5118 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5119 if (multilevel_requested) { 5120 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5121 } else if (pcbddc->max_levels) { 5122 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5123 } 5124 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5125 } 5126 } 5127 5128 /* create the coarse KSP object only once with defaults */ 5129 if (coarse_mat_is) { 5130 MatReuse coarse_mat_reuse; 5131 PetscViewer dbg_viewer = NULL; 5132 if (pcbddc->dbg_flag) { 5133 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5134 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5135 } 5136 if (!pcbddc->coarse_ksp) { 5137 char prefix[256],str_level[16]; 5138 size_t len; 5139 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5140 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5141 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5142 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5143 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5144 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5145 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5146 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5147 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5148 /* prefix */ 5149 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5150 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5151 if (!pcbddc->current_level) { 5152 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5153 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5154 } else { 5155 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5156 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5157 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5158 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5159 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5160 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5161 } 5162 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5163 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5164 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5165 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5166 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5167 /* allow user customization */ 5168 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5169 } 5170 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5171 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5172 if (nisdofs) { 5173 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5174 for (i=0;i<nisdofs;i++) { 5175 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5176 } 5177 } 5178 if (nisneu) { 5179 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5180 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5181 } 5182 if (nisvert) { 5183 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5184 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5185 } 5186 5187 /* get some info after set from options */ 5188 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5189 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5190 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5191 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5192 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5193 isbddc = PETSC_FALSE; 5194 } 5195 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5196 if (isredundant) { 5197 KSP inner_ksp; 5198 PC inner_pc; 5199 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5200 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5201 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5202 } 5203 5204 /* assemble coarse matrix */ 5205 if (coarse_reuse) { 5206 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5207 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5208 coarse_mat_reuse = MAT_REUSE_MATRIX; 5209 } else { 5210 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5211 } 5212 if (isbddc || isnn) { 5213 if (pcbddc->coarsening_ratio > 1) { 5214 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5215 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5216 if (pcbddc->dbg_flag) { 5217 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5218 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5219 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5220 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5221 } 5222 } 5223 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5224 } else { 5225 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5226 coarse_mat = coarse_mat_is; 5227 } 5228 } else { 5229 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5230 } 5231 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5232 5233 /* propagate symmetry info of coarse matrix */ 5234 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5235 if (pc->pmat->symmetric_set) { 5236 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5237 } 5238 if (pc->pmat->hermitian_set) { 5239 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5240 } 5241 if (pc->pmat->spd_set) { 5242 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5243 } 5244 /* set operators */ 5245 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5246 if (pcbddc->dbg_flag) { 5247 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5248 } 5249 } else { /* processes non partecipating to coarse solver (if any) */ 5250 coarse_mat = 0; 5251 } 5252 ierr = PetscFree(isarray);CHKERRQ(ierr); 5253 #if 0 5254 { 5255 PetscViewer viewer; 5256 char filename[256]; 5257 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5258 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5259 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5260 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5261 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5262 } 5263 #endif 5264 5265 /* Compute coarse null space (special handling by BDDC only) */ 5266 #if 0 5267 if (pcbddc->NullSpace) { 5268 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5269 } 5270 #endif 5271 /* hack */ 5272 if (pcbddc->coarse_ksp) { 5273 Vec crhs,csol; 5274 5275 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5276 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5277 if (!csol) { 5278 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5279 } 5280 if (!crhs) { 5281 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5282 } 5283 } 5284 5285 /* compute null space for coarse solver if the benign trick has been requested */ 5286 if (pcbddc->benign_null) { 5287 5288 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5289 for (i=0;i<pcbddc->benign_n;i++) { 5290 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5291 } 5292 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5293 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5294 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5295 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5296 if (coarse_mat) { 5297 Vec nullv; 5298 PetscScalar *array,*array2; 5299 PetscInt nl; 5300 5301 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5302 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5303 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5304 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5305 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5306 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5307 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5308 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5309 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5310 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5311 } 5312 } 5313 5314 if (pcbddc->coarse_ksp) { 5315 PetscBool ispreonly; 5316 5317 if (CoarseNullSpace) { 5318 PetscBool isnull; 5319 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5320 if (isnull) { 5321 if (isbddc && !pcbddc->benign_saddle_point) { 5322 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5323 } else { 5324 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5325 } 5326 } else { 5327 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5328 } 5329 } 5330 /* setup coarse ksp */ 5331 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5332 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5333 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5334 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5335 KSP check_ksp; 5336 KSPType check_ksp_type; 5337 PC check_pc; 5338 Vec check_vec,coarse_vec; 5339 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5340 PetscInt its; 5341 PetscBool compute_eigs; 5342 PetscReal *eigs_r,*eigs_c; 5343 PetscInt neigs; 5344 const char *prefix; 5345 5346 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5347 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5348 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5349 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5350 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5351 if (ispreonly) { 5352 check_ksp_type = KSPPREONLY; 5353 compute_eigs = PETSC_FALSE; 5354 } else { 5355 check_ksp_type = KSPGMRES; 5356 compute_eigs = PETSC_TRUE; 5357 } 5358 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5359 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5360 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5361 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5362 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5363 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5364 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5365 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5366 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5367 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5368 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5369 /* create random vec */ 5370 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5371 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5372 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5373 if (CoarseNullSpace) { 5374 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5375 } 5376 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5377 /* solve coarse problem */ 5378 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5379 if (CoarseNullSpace) { 5380 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5381 } 5382 /* set eigenvalue estimation if preonly has not been requested */ 5383 if (compute_eigs) { 5384 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5385 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5386 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5387 lambda_max = eigs_r[neigs-1]; 5388 lambda_min = eigs_r[0]; 5389 if (pcbddc->use_coarse_estimates) { 5390 if (lambda_max>lambda_min) { 5391 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5392 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5393 } 5394 } 5395 } 5396 5397 /* check coarse problem residual error */ 5398 if (pcbddc->dbg_flag) { 5399 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5400 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5401 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5402 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5403 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5404 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5405 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5406 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5407 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5408 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5409 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5410 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5411 if (CoarseNullSpace) { 5412 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5413 } 5414 if (compute_eigs) { 5415 PetscReal lambda_max_s,lambda_min_s; 5416 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5417 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5418 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5419 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5420 for (i=0;i<neigs;i++) { 5421 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5422 } 5423 } 5424 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5425 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5426 } 5427 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5428 if (compute_eigs) { 5429 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5430 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5431 } 5432 } 5433 } 5434 /* print additional info */ 5435 if (pcbddc->dbg_flag) { 5436 /* waits until all processes reaches this point */ 5437 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5438 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5439 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5440 } 5441 5442 /* free memory */ 5443 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5444 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5445 PetscFunctionReturn(0); 5446 } 5447 5448 #undef __FUNCT__ 5449 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5450 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5451 { 5452 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5453 PC_IS* pcis = (PC_IS*)pc->data; 5454 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5455 IS subset,subset_mult,subset_n; 5456 PetscInt local_size,coarse_size=0; 5457 PetscInt *local_primal_indices=NULL; 5458 const PetscInt *t_local_primal_indices; 5459 PetscErrorCode ierr; 5460 5461 PetscFunctionBegin; 5462 /* Compute global number of coarse dofs */ 5463 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5464 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5465 } 5466 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5467 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5468 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5469 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5470 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5471 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5472 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5473 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5474 if (local_size != pcbddc->local_primal_size) { 5475 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5476 } 5477 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5478 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5479 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5480 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5481 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5482 5483 /* check numbering */ 5484 if (pcbddc->dbg_flag) { 5485 PetscScalar coarsesum,*array,*array2; 5486 PetscInt i; 5487 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5488 5489 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5490 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5491 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5492 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5493 /* counter */ 5494 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5495 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5496 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5497 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5498 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5499 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5500 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5501 for (i=0;i<pcbddc->local_primal_size;i++) { 5502 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5503 } 5504 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5505 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5506 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5507 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5508 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5509 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5510 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5511 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5512 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5513 for (i=0;i<pcis->n;i++) { 5514 if (array[i] != 0.0 && array[i] != array2[i]) { 5515 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5516 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5517 set_error = PETSC_TRUE; 5518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5519 } 5520 } 5521 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5522 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5523 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5524 for (i=0;i<pcis->n;i++) { 5525 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5526 } 5527 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5528 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5529 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5530 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5531 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5532 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5533 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5534 PetscInt *gidxs; 5535 5536 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5537 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5538 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5539 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5541 for (i=0;i<pcbddc->local_primal_size;i++) { 5542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5543 } 5544 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5545 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5546 } 5547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5548 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5549 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5550 } 5551 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5552 /* get back data */ 5553 *coarse_size_n = coarse_size; 5554 *local_primal_indices_n = local_primal_indices; 5555 PetscFunctionReturn(0); 5556 } 5557 5558 #undef __FUNCT__ 5559 #define __FUNCT__ "PCBDDCGlobalToLocal" 5560 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5561 { 5562 IS localis_t; 5563 PetscInt i,lsize,*idxs,n; 5564 PetscScalar *vals; 5565 PetscErrorCode ierr; 5566 5567 PetscFunctionBegin; 5568 /* get indices in local ordering exploiting local to global map */ 5569 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5570 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5571 for (i=0;i<lsize;i++) vals[i] = 1.0; 5572 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5573 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5574 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5575 if (idxs) { /* multilevel guard */ 5576 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5577 } 5578 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5579 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5580 ierr = PetscFree(vals);CHKERRQ(ierr); 5581 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5582 /* now compute set in local ordering */ 5583 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5584 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5585 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5586 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5587 for (i=0,lsize=0;i<n;i++) { 5588 if (PetscRealPart(vals[i]) > 0.5) { 5589 lsize++; 5590 } 5591 } 5592 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5593 for (i=0,lsize=0;i<n;i++) { 5594 if (PetscRealPart(vals[i]) > 0.5) { 5595 idxs[lsize++] = i; 5596 } 5597 } 5598 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5599 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5600 *localis = localis_t; 5601 PetscFunctionReturn(0); 5602 } 5603 5604 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5605 #undef __FUNCT__ 5606 #define __FUNCT__ "PCBDDCMatMult_Private" 5607 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5608 { 5609 PCBDDCChange_ctx change_ctx; 5610 PetscErrorCode ierr; 5611 5612 PetscFunctionBegin; 5613 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5614 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5615 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5616 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5617 PetscFunctionReturn(0); 5618 } 5619 5620 #undef __FUNCT__ 5621 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5622 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5623 { 5624 PCBDDCChange_ctx change_ctx; 5625 PetscErrorCode ierr; 5626 5627 PetscFunctionBegin; 5628 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5629 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5630 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5631 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5632 PetscFunctionReturn(0); 5633 } 5634 5635 #undef __FUNCT__ 5636 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5637 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5638 { 5639 PC_IS *pcis=(PC_IS*)pc->data; 5640 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5641 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5642 Mat S_j; 5643 PetscInt *used_xadj,*used_adjncy; 5644 PetscBool free_used_adj; 5645 PetscErrorCode ierr; 5646 5647 PetscFunctionBegin; 5648 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5649 free_used_adj = PETSC_FALSE; 5650 if (pcbddc->sub_schurs_layers == -1) { 5651 used_xadj = NULL; 5652 used_adjncy = NULL; 5653 } else { 5654 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5655 used_xadj = pcbddc->mat_graph->xadj; 5656 used_adjncy = pcbddc->mat_graph->adjncy; 5657 } else if (pcbddc->computed_rowadj) { 5658 used_xadj = pcbddc->mat_graph->xadj; 5659 used_adjncy = pcbddc->mat_graph->adjncy; 5660 } else { 5661 PetscBool flg_row=PETSC_FALSE; 5662 const PetscInt *xadj,*adjncy; 5663 PetscInt nvtxs; 5664 5665 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5666 if (flg_row) { 5667 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5668 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5669 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5670 free_used_adj = PETSC_TRUE; 5671 } else { 5672 pcbddc->sub_schurs_layers = -1; 5673 used_xadj = NULL; 5674 used_adjncy = NULL; 5675 } 5676 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5677 } 5678 } 5679 5680 /* setup sub_schurs data */ 5681 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5682 if (!sub_schurs->use_mumps) { 5683 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5684 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5685 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5686 } else { 5687 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5688 PetscBool isseqaij; 5689 if (!pcbddc->use_vertices && reuse_solvers) { 5690 PetscInt n_vertices; 5691 5692 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5693 reuse_solvers = (PetscBool)!n_vertices; 5694 } 5695 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5696 if (!isseqaij) { 5697 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5698 if (matis->A == pcbddc->local_mat) { 5699 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5700 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5701 } else { 5702 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5703 } 5704 } 5705 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5706 } 5707 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5708 5709 /* free adjacency */ 5710 if (free_used_adj) { 5711 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5712 } 5713 PetscFunctionReturn(0); 5714 } 5715 5716 #undef __FUNCT__ 5717 #define __FUNCT__ "PCBDDCInitSubSchurs" 5718 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5719 { 5720 PC_IS *pcis=(PC_IS*)pc->data; 5721 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5722 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5723 PCBDDCGraph graph; 5724 PetscErrorCode ierr; 5725 5726 PetscFunctionBegin; 5727 /* attach interface graph for determining subsets */ 5728 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5729 IS verticesIS,verticescomm; 5730 PetscInt vsize,*idxs; 5731 5732 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5733 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5734 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5735 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5736 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5737 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5738 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5739 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5740 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5741 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5742 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5743 /* 5744 if (pcbddc->dbg_flag) { 5745 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5746 } 5747 */ 5748 } else { 5749 graph = pcbddc->mat_graph; 5750 } 5751 5752 /* sub_schurs init */ 5753 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5754 5755 /* free graph struct */ 5756 if (pcbddc->sub_schurs_rebuild) { 5757 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5758 } 5759 PetscFunctionReturn(0); 5760 } 5761 5762 #undef __FUNCT__ 5763 #define __FUNCT__ "PCBDDCCheckOperator" 5764 PetscErrorCode PCBDDCCheckOperator(PC pc) 5765 { 5766 PC_IS *pcis=(PC_IS*)pc->data; 5767 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5768 PetscErrorCode ierr; 5769 5770 PetscFunctionBegin; 5771 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5772 IS zerodiag = NULL; 5773 Mat S_j,B0_B=NULL; 5774 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5775 PetscScalar *p0_check,*array,*array2; 5776 PetscReal norm; 5777 PetscInt i; 5778 5779 /* B0 and B0_B */ 5780 if (zerodiag) { 5781 IS dummy; 5782 5783 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5784 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5785 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5786 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5787 } 5788 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5789 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5790 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5791 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5792 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5793 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5794 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5795 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5796 /* S_j */ 5797 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5798 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5799 5800 /* mimic vector in \widetilde{W}_\Gamma */ 5801 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5802 /* continuous in primal space */ 5803 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5804 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5805 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5806 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5807 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5808 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5809 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5810 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5811 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5812 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5813 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5814 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5815 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5816 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5817 5818 /* assemble rhs for coarse problem */ 5819 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5820 /* local with Schur */ 5821 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5822 if (zerodiag) { 5823 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5824 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5825 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5826 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5827 } 5828 /* sum on primal nodes the local contributions */ 5829 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5830 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5831 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5832 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5833 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5834 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5835 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5836 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5837 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5838 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5839 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5840 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5841 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5842 /* scale primal nodes (BDDC sums contibutions) */ 5843 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5844 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5845 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5846 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5847 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5848 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5849 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5850 /* global: \widetilde{B0}_B w_\Gamma */ 5851 if (zerodiag) { 5852 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5853 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5854 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5855 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5856 } 5857 /* BDDC */ 5858 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5859 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5860 5861 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5862 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5863 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5864 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5865 for (i=0;i<pcbddc->benign_n;i++) { 5866 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5867 } 5868 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5869 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5870 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5871 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5872 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5873 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5874 } 5875 PetscFunctionReturn(0); 5876 } 5877