1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 /* TODO: add reuse flag */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqAIJCompress" 12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 13 { 14 Mat Bt; 15 PetscScalar *a,*bdata; 16 const PetscInt *ii,*ij; 17 PetscInt m,n,i,nnz,*bii,*bij; 18 PetscBool flg_row; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 23 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 24 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 25 nnz = n; 26 for (i=0;i<ii[n];i++) { 27 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 28 } 29 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 30 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 31 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 32 nnz = 0; 33 bii[0] = 0; 34 for (i=0;i<n;i++) { 35 PetscInt j; 36 for (j=ii[i];j<ii[i+1];j++) { 37 PetscScalar entry = a[j]; 38 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 39 bij[nnz] = ij[j]; 40 bdata[nnz] = entry; 41 nnz++; 42 } 43 } 44 bii[i+1] = nnz; 45 } 46 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 47 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 48 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 49 { 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 51 b->free_a = PETSC_TRUE; 52 b->free_ij = PETSC_TRUE; 53 } 54 *B = Bt; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDetectDisconnectedComponents" 60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 61 { 62 Mat B; 63 IS is_dummy,*cc_n; 64 ISLocalToGlobalMapping l2gmap_dummy; 65 PCBDDCGraph graph; 66 PetscInt i,n; 67 PetscInt *xadj,*adjncy; 68 PetscInt *xadj_filtered,*adjncy_filtered; 69 PetscBool flg_row,isseqaij; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 74 if (!isseqaij && filter) { 75 PetscBool isseqdense; 76 77 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 78 if (!isseqdense) { 79 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 80 } else { /* TODO: rectangular case and LDA */ 81 PetscScalar *array; 82 PetscReal chop=1.e-6; 83 84 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 86 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 87 for (i=0;i<n;i++) { 88 PetscInt j; 89 for (j=i+1;j<n;j++) { 90 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 91 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 92 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 93 } 94 } 95 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 96 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 97 } 98 } else { 99 B = A; 100 } 101 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 102 103 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 104 if (filter) { 105 PetscScalar *data; 106 PetscInt j,cum; 107 108 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 109 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 110 cum = 0; 111 for (i=0;i<n;i++) { 112 PetscInt t; 113 114 for (j=xadj[i];j<xadj[i+1];j++) { 115 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 116 continue; 117 } 118 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 119 } 120 t = xadj_filtered[i]; 121 xadj_filtered[i] = cum; 122 cum += t; 123 } 124 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 125 } else { 126 xadj_filtered = NULL; 127 adjncy_filtered = NULL; 128 } 129 130 /* compute local connected components using PCBDDCGraph */ 131 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 132 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 133 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 135 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 136 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 137 if (xadj_filtered) { 138 graph->xadj = xadj_filtered; 139 graph->adjncy = adjncy_filtered; 140 } else { 141 graph->xadj = xadj; 142 graph->adjncy = adjncy; 143 } 144 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 145 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 146 /* partial clean up */ 147 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 148 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 149 if (A != B) { 150 ierr = MatDestroy(&B);CHKERRQ(ierr); 151 } 152 153 /* get back data */ 154 if (ncc) *ncc = graph->ncc; 155 if (cc) { 156 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 157 for (i=0;i<graph->ncc;i++) { 158 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 159 } 160 *cc = cc_n; 161 } 162 /* clean up graph */ 163 graph->xadj = 0; 164 graph->adjncy = 0; 165 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "PCBDDCBenignCheck" 171 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 172 { 173 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 174 PC_IS* pcis = (PC_IS*)(pc->data); 175 IS dirIS = NULL; 176 PetscInt i; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 181 if (zerodiag) { 182 Mat A; 183 Vec vec3_N; 184 PetscScalar *vals; 185 const PetscInt *idxs; 186 PetscInt nz; 187 188 /* p0 */ 189 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 190 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 192 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 193 for (i=0;i<nz;i++) vals[i] = 1.; 194 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 195 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 196 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 197 /* v_I */ 198 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 199 for (i=0;i<nz;i++) vals[i] = 0.; 200 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 201 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 202 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 203 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 204 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 205 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 206 if (dirIS) { 207 PetscInt n; 208 209 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 210 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 211 for (i=0;i<n;i++) vals[i] = 0.; 212 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 213 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 214 } 215 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 216 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 217 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 218 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 219 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 220 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 221 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 222 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 223 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 224 } 225 ierr = PetscFree(vals);CHKERRQ(ierr); 226 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 227 } 228 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 229 230 /* check PCBDDCBenignGetOrSetP0 */ 231 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 232 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 233 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 234 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 235 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 236 for (i=0;i<pcbddc->benign_n;i++) { 237 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 238 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 239 } 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 246 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 247 { 248 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 249 IS pressures,zerodiag,*zerodiag_subs; 250 PetscInt nz,n; 251 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 256 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 257 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 258 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 259 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 260 Checks if all the pressure dofs in each subdomain have a zero diagonal 261 If not, a change of basis on pressures is not needed 262 since the local Schur complements are SPD 263 */ 264 has_null_pressures = PETSC_TRUE; 265 have_null = PETSC_TRUE; 266 if (pcbddc->n_ISForDofsLocal) { 267 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 268 269 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 270 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 271 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 272 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 273 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 274 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 275 if (!sorted) { 276 ierr = ISSort(pressures);CHKERRQ(ierr); 277 } 278 } else { 279 pressures = NULL; 280 } 281 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 282 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 283 /* TODO: add check for shared dofs and raise error */ 284 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 285 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 286 if (!sorted) { 287 ierr = ISSort(zerodiag);CHKERRQ(ierr); 288 } 289 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 290 if (!nz) { 291 if (n) have_null = PETSC_FALSE; 292 has_null_pressures = PETSC_FALSE; 293 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 294 } 295 recompute_zerodiag = PETSC_FALSE; 296 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 297 zerodiag_subs = NULL; 298 pcbddc->benign_n = 0; 299 if (has_null_pressures) { 300 IS *subs; 301 PetscInt nsubs,i; 302 303 subs = pcbddc->local_subs; 304 nsubs = pcbddc->n_local_subs; 305 if (nsubs > 1) { 306 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 307 for (i=0;i<nsubs;i++) { 308 ISLocalToGlobalMapping l2g; 309 IS t_zerodiag_subs; 310 PetscInt nl; 311 312 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 313 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 314 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 315 if (nl) { 316 PetscBool valid = PETSC_TRUE; 317 318 if (pressures) { 319 IS t_pressure_subs; 320 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 321 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 322 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 323 } 324 if (valid) { 325 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 326 pcbddc->benign_n++; 327 } else { 328 recompute_zerodiag = PETSC_TRUE; 329 } 330 } 331 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 332 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 333 } 334 } else { /* there's just one subdomain (or zero if they have not been detected */ 335 PetscBool valid = PETSC_TRUE; 336 if (pressures) { 337 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 338 } 339 if (valid) { 340 pcbddc->benign_n = 1; 341 ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr); 342 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 343 zerodiag_subs[0] = zerodiag; 344 } 345 } 346 } 347 348 if (!pcbddc->benign_n) { 349 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 350 recompute_zerodiag = PETSC_FALSE; 351 has_null_pressures = PETSC_FALSE; 352 have_null = PETSC_FALSE; 353 } 354 355 /* final check for null pressures */ 356 if (zerodiag && pressures) { 357 PetscInt nz,np; 358 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 359 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 360 if (nz != np) have_null = PETSC_FALSE; 361 } 362 363 if (recompute_zerodiag) { 364 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 365 if (pcbddc->benign_n == 1) { 366 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 367 zerodiag = zerodiag_subs[0]; 368 } else { 369 PetscInt i,nzn,*new_idxs; 370 371 nzn = 0; 372 for (i=0;i<pcbddc->benign_n;i++) { 373 PetscInt ns; 374 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 375 nzn += ns; 376 } 377 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 378 nzn = 0; 379 for (i=0;i<pcbddc->benign_n;i++) { 380 PetscInt ns,*idxs; 381 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 382 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 383 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 384 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 385 nzn += ns; 386 } 387 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 388 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 389 } 390 have_null = PETSC_FALSE; 391 } 392 393 if (has_null_pressures) { 394 IS zerodiagc; 395 const PetscInt *idxs,*idxsc; 396 PetscInt i,s,*nnz; 397 398 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 399 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 400 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 401 /* local change of basis for pressures */ 402 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 403 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 404 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 405 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 406 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 407 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 408 for (i=0;i<pcbddc->benign_n;i++) { 409 PetscInt nzs,j; 410 411 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 413 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 414 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 415 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 416 } 417 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 418 ierr = PetscFree(nnz);CHKERRQ(ierr); 419 /* set identity on velocities */ 420 for (i=0;i<n-nz;i++) { 421 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 422 } 423 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 424 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 425 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 426 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 427 /* set change on pressures */ 428 for (s=0;s<pcbddc->benign_n;s++) { 429 PetscScalar *array; 430 PetscInt nzs; 431 432 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 433 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 434 for (i=0;i<nzs-1;i++) { 435 PetscScalar vals[2]; 436 PetscInt cols[2]; 437 438 cols[0] = idxs[i]; 439 cols[1] = idxs[nzs-1]; 440 vals[0] = 1.; 441 vals[1] = 1.; 442 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 443 } 444 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 445 for (i=0;i<nzs-1;i++) array[i] = -1.; 446 array[nzs-1] = 1.; 447 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 448 /* store local idxs for p0 */ 449 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 450 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 451 ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr); 452 ierr = PetscFree(array);CHKERRQ(ierr); 453 } 454 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 456 /* TODO: need optimization? */ 457 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 458 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 459 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 460 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 461 /* store global idxs for p0 */ 462 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 463 } 464 ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr); 465 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 466 467 /* determines if the coarse solver will be singular or not */ 468 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 469 *zerodiaglocal = zerodiag; 470 PetscFunctionReturn(0); 471 } 472 473 #undef __FUNCT__ 474 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 475 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 476 { 477 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 478 PetscErrorCode ierr; 479 480 PetscFunctionBegin; 481 if (!pcbddc->benign_sf) { 482 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 483 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 484 } 485 if (get) { /* use SF to get values */ 486 PetscScalar *array; 487 488 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 489 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 490 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 492 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 493 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 494 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 495 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 502 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 503 { 504 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 /* TODO: add error checking 509 - avoid nested pop (or push) calls. 510 - cannot push before pop. 511 - cannot call this if pcbddc->local_mat is NULL 512 */ 513 if (!pcbddc->benign_n) { 514 PetscFunctionReturn(0); 515 } 516 if (pop) { 517 IS is_p0; 518 MatReuse reuse; 519 520 /* extract B_0 */ 521 reuse = MAT_INITIAL_MATRIX; 522 if (pcbddc->benign_B0) { 523 reuse = MAT_REUSE_MATRIX; 524 } 525 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 526 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 527 /* remove rows and cols from local problem */ 528 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 529 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 530 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 531 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 532 } else { /* push */ 533 PetscInt i; 534 535 for (i=0;i<pcbddc->benign_n;i++) { 536 PetscScalar *B0_vals; 537 PetscInt *B0_cols,B0_ncol; 538 539 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 540 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 541 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 542 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 543 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 544 } 545 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 } 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNCT__ 552 #define __FUNCT__ "PCBDDCAdaptiveSelection" 553 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 554 { 555 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 556 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 557 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 558 PetscBLASInt *B_iwork,*B_ifail; 559 PetscScalar *work,lwork; 560 PetscScalar *St,*S,*eigv; 561 PetscScalar *Sarray,*Starray; 562 PetscReal *eigs,thresh; 563 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 564 PetscBool allocated_S_St; 565 #if defined(PETSC_USE_COMPLEX) 566 PetscReal *rwork; 567 #endif 568 PetscErrorCode ierr; 569 570 PetscFunctionBegin; 571 if (!sub_schurs->use_mumps) { 572 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 573 } 574 575 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 576 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 577 } 578 579 if (pcbddc->dbg_flag) { 580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 582 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 583 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 584 } 585 586 if (pcbddc->dbg_flag) { 587 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 588 } 589 590 /* max size of subsets */ 591 mss = 0; 592 for (i=0;i<sub_schurs->n_subs;i++) { 593 PetscInt subset_size; 594 595 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 596 mss = PetscMax(mss,subset_size); 597 } 598 599 /* min/max and threshold */ 600 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 601 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 602 nmax = PetscMax(nmin,nmax); 603 allocated_S_St = PETSC_FALSE; 604 if (nmin) { 605 allocated_S_St = PETSC_TRUE; 606 } 607 608 /* allocate lapack workspace */ 609 cum = cum2 = 0; 610 maxneigs = 0; 611 for (i=0;i<sub_schurs->n_subs;i++) { 612 PetscInt n,subset_size; 613 614 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 615 n = PetscMin(subset_size,nmax); 616 cum += subset_size; 617 cum2 += subset_size*n; 618 maxneigs = PetscMax(maxneigs,n); 619 } 620 if (mss) { 621 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 622 PetscBLASInt B_itype = 1; 623 PetscBLASInt B_N = mss; 624 PetscReal zero = 0.0; 625 PetscReal eps = 0.0; /* dlamch? */ 626 627 B_lwork = -1; 628 S = NULL; 629 St = NULL; 630 eigs = NULL; 631 eigv = NULL; 632 B_iwork = NULL; 633 B_ifail = NULL; 634 #if defined(PETSC_USE_COMPLEX) 635 rwork = NULL; 636 #endif 637 thresh = 1.0; 638 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 639 #if defined(PETSC_USE_COMPLEX) 640 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 641 #else 642 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 643 #endif 644 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 645 ierr = PetscFPTrapPop();CHKERRQ(ierr); 646 } else { 647 /* TODO */ 648 } 649 } else { 650 lwork = 0; 651 } 652 653 nv = 0; 654 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 655 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 656 } 657 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 658 if (allocated_S_St) { 659 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 660 } 661 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 662 #if defined(PETSC_USE_COMPLEX) 663 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 664 #endif 665 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 666 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 667 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 668 nv+cum,&pcbddc->adaptive_constraints_idxs, 669 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 670 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 671 672 maxneigs = 0; 673 cum = cum2 = cumarray = 0; 674 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 675 pcbddc->adaptive_constraints_data_ptr[0] = 0; 676 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 677 const PetscInt *idxs; 678 679 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 680 for (cum=0;cum<nv;cum++) { 681 pcbddc->adaptive_constraints_n[cum] = 1; 682 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 683 pcbddc->adaptive_constraints_data[cum] = 1.0; 684 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 685 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 686 } 687 cum2 = cum; 688 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 689 } 690 691 if (mss) { /* multilevel */ 692 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 693 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 694 } 695 696 thresh = pcbddc->adaptive_threshold; 697 for (i=0;i<sub_schurs->n_subs;i++) { 698 const PetscInt *idxs; 699 PetscReal infty = PETSC_MAX_REAL; 700 PetscInt j,subset_size,eigs_start = 0; 701 PetscBLASInt B_N; 702 PetscBool same_data = PETSC_FALSE; 703 704 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 705 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 706 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 707 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 708 if (sub_schurs->is_hermitian) { 709 PetscInt j,k; 710 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 711 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 712 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 713 } 714 for (j=0;j<subset_size;j++) { 715 for (k=j;k<subset_size;k++) { 716 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 717 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 718 } 719 } 720 } else { 721 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 722 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 723 } 724 } else { 725 S = Sarray + cumarray; 726 St = Starray + cumarray; 727 } 728 729 /* see if we can save some work */ 730 if (sub_schurs->n_subs == 1) { 731 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 732 } 733 734 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 735 B_neigs = 0; 736 } else { 737 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 738 PetscBLASInt B_itype = 1; 739 PetscBLASInt B_IL, B_IU; 740 PetscReal eps = -1.0; /* dlamch? */ 741 PetscInt nmin_s; 742 743 if (pcbddc->dbg_flag) { 744 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 745 } 746 747 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 748 if (thresh > 1.+PETSC_SMALL) { 749 750 /* ask for eigenvalues larger than thresh */ 751 #if defined(PETSC_USE_COMPLEX) 752 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 753 #else 754 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 755 #endif 756 } else { 757 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 758 B_IL = 1; 759 #if defined(PETSC_USE_COMPLEX) 760 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 761 #else 762 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 763 #endif 764 } 765 ierr = PetscFPTrapPop();CHKERRQ(ierr); 766 if (B_ierr) { 767 if (B_ierr < 0 ) { 768 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 769 } else if (B_ierr <= B_N) { 770 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 771 } else { 772 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 773 } 774 } 775 776 if (B_neigs > nmax) { 777 if (pcbddc->dbg_flag) { 778 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 779 } 780 eigs_start = B_neigs -nmax; 781 B_neigs = nmax; 782 } 783 784 nmin_s = PetscMin(nmin,B_N); 785 if (B_neigs < nmin_s) { 786 PetscBLASInt B_neigs2; 787 788 B_IU = B_N - B_neigs; 789 B_IL = B_N - nmin_s + 1; 790 if (pcbddc->dbg_flag) { 791 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 792 } 793 if (sub_schurs->is_hermitian) { 794 PetscInt j; 795 for (j=0;j<subset_size;j++) { 796 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 797 } 798 for (j=0;j<subset_size;j++) { 799 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 800 } 801 } else { 802 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 803 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 804 } 805 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 806 #if defined(PETSC_USE_COMPLEX) 807 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 808 #else 809 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 810 #endif 811 ierr = PetscFPTrapPop();CHKERRQ(ierr); 812 B_neigs += B_neigs2; 813 } 814 if (B_ierr) { 815 if (B_ierr < 0 ) { 816 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 817 } else if (B_ierr <= B_N) { 818 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 819 } else { 820 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 821 } 822 } 823 if (pcbddc->dbg_flag) { 824 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 825 for (j=0;j<B_neigs;j++) { 826 if (eigs[j] == 0.0) { 827 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 828 } else { 829 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 830 } 831 } 832 } 833 } else { 834 /* TODO */ 835 } 836 } 837 maxneigs = PetscMax(B_neigs,maxneigs); 838 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 839 if (B_neigs) { 840 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 841 842 if (pcbddc->dbg_flag > 1) { 843 PetscInt ii; 844 for (ii=0;ii<B_neigs;ii++) { 845 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 846 for (j=0;j<B_N;j++) { 847 #if defined(PETSC_USE_COMPLEX) 848 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 849 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 850 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 851 #else 852 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 853 #endif 854 } 855 } 856 } 857 #if 0 858 for (j=0;j<B_neigs;j++) { 859 PetscBLASInt Blas_N,Blas_one = 1.0; 860 PetscScalar norm; 861 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 862 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 863 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 864 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 865 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 866 } else { 867 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 868 } 869 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 870 } 871 #endif 872 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 873 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 874 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 875 cum++; 876 } 877 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 878 /* shift for next computation */ 879 cumarray += subset_size*subset_size; 880 } 881 if (pcbddc->dbg_flag) { 882 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 883 } 884 885 if (mss) { 886 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 887 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 888 /* destroy matrices (junk) */ 889 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 890 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 891 } 892 if (allocated_S_St) { 893 ierr = PetscFree2(S,St);CHKERRQ(ierr); 894 } 895 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 896 #if defined(PETSC_USE_COMPLEX) 897 ierr = PetscFree(rwork);CHKERRQ(ierr); 898 #endif 899 if (pcbddc->dbg_flag) { 900 PetscInt maxneigs_r; 901 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 902 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 903 } 904 PetscFunctionReturn(0); 905 } 906 907 #undef __FUNCT__ 908 #define __FUNCT__ "PCBDDCSetUpSolvers" 909 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 910 { 911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 912 PetscScalar *coarse_submat_vals; 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 /* Setup local scatters R_to_B and (optionally) R_to_D */ 917 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 918 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 919 920 /* Setup local neumann solver ksp_R */ 921 /* PCBDDCSetUpLocalScatters should be called first! */ 922 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 923 924 /* Change global null space passed in by the user if change of basis has been requested */ 925 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 926 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 927 } 928 929 /* 930 Setup local correction and local part of coarse basis. 931 Gives back the dense local part of the coarse matrix in column major ordering 932 */ 933 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 934 935 /* Compute total number of coarse nodes and setup coarse solver */ 936 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 937 938 /* free */ 939 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } 942 943 #undef __FUNCT__ 944 #define __FUNCT__ "PCBDDCResetCustomization" 945 PetscErrorCode PCBDDCResetCustomization(PC pc) 946 { 947 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 948 PetscErrorCode ierr; 949 950 PetscFunctionBegin; 951 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 952 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 953 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 954 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 955 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 956 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 957 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 958 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 959 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 960 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 961 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 962 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "PCBDDCResetTopography" 968 PetscErrorCode PCBDDCResetTopography(PC pc) 969 { 970 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 971 PetscInt i; 972 PetscErrorCode ierr; 973 974 PetscFunctionBegin; 975 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 976 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 977 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 978 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 979 for (i=0;i<pcbddc->n_local_subs;i++) { 980 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 981 } 982 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 983 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "PCBDDCResetSolvers" 989 PetscErrorCode PCBDDCResetSolvers(PC pc) 990 { 991 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 992 PetscScalar *array; 993 PetscErrorCode ierr; 994 995 PetscFunctionBegin; 996 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 997 if (pcbddc->coarse_phi_B) { 998 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 999 ierr = PetscFree(array);CHKERRQ(ierr); 1000 } 1001 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1002 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1003 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1004 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1005 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1006 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1007 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1008 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1009 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1010 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1011 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1012 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1013 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1014 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1015 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1016 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1017 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1018 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1019 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1020 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1021 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1022 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1023 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1024 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1025 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1026 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1027 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1028 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1029 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1030 PetscFunctionReturn(0); 1031 } 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1035 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1036 { 1037 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1038 PC_IS *pcis = (PC_IS*)pc->data; 1039 VecType impVecType; 1040 PetscInt n_constraints,n_R,old_size; 1041 PetscErrorCode ierr; 1042 1043 PetscFunctionBegin; 1044 if (!pcbddc->ConstraintMatrix) { 1045 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1046 } 1047 /* get sizes */ 1048 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1049 n_R = pcis->n - pcbddc->n_vertices; 1050 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1051 /* local work vectors (try to avoid unneeded work)*/ 1052 /* R nodes */ 1053 old_size = -1; 1054 if (pcbddc->vec1_R) { 1055 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1056 } 1057 if (n_R != old_size) { 1058 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1059 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1060 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1061 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1062 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1063 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1064 } 1065 /* local primal dofs */ 1066 old_size = -1; 1067 if (pcbddc->vec1_P) { 1068 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1069 } 1070 if (pcbddc->local_primal_size != old_size) { 1071 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1072 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1073 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1074 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1075 } 1076 /* local explicit constraints */ 1077 old_size = -1; 1078 if (pcbddc->vec1_C) { 1079 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1080 } 1081 if (n_constraints && n_constraints != old_size) { 1082 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1083 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1084 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1085 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "PCBDDCSetUpCorrection" 1092 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1093 { 1094 PetscErrorCode ierr; 1095 /* pointers to pcis and pcbddc */ 1096 PC_IS* pcis = (PC_IS*)pc->data; 1097 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1098 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1099 /* submatrices of local problem */ 1100 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1101 /* submatrices of benign trick */ 1102 Mat B0_V = NULL; 1103 /* submatrices of local coarse problem */ 1104 Mat S_VV,S_CV,S_VC,S_CC; 1105 /* working matrices */ 1106 Mat C_CR; 1107 /* additional working stuff */ 1108 PC pc_R; 1109 Mat F; 1110 PetscBool isLU,isCHOL,isILU; 1111 1112 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1113 PetscScalar *work; 1114 PetscInt *idx_V_B; 1115 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1116 PetscInt i,n_R,n_D,n_B; 1117 1118 /* some shortcuts to scalars */ 1119 PetscScalar one=1.0,m_one=-1.0; 1120 1121 PetscFunctionBegin; 1122 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1123 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1124 } 1125 1126 /* Set Non-overlapping dimensions */ 1127 n_vertices = pcbddc->n_vertices; 1128 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1129 n_B = pcis->n_B; 1130 n_D = pcis->n - n_B; 1131 n_R = pcis->n - n_vertices; 1132 1133 /* vertices in boundary numbering */ 1134 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1135 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1136 if (i != n_vertices) { 1137 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1138 } 1139 1140 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1141 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1142 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1143 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1144 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1145 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1146 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1147 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1148 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1149 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1150 1151 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1152 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1153 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1154 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1155 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1156 lda_rhs = n_R; 1157 if (isLU || isILU || isCHOL) { 1158 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1159 } else if (sub_schurs->reuse_mumps) { 1160 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1161 MatFactorType type; 1162 1163 F = reuse_mumps->F; 1164 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1165 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1166 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1167 } else { 1168 F = NULL; 1169 } 1170 1171 /* allocate workspace */ 1172 n = 0; 1173 if (n_constraints) { 1174 n += lda_rhs*n_constraints; 1175 } 1176 if (n_vertices) { 1177 n = PetscMax(2*lda_rhs*n_vertices,n); 1178 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1179 } 1180 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1181 1182 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1183 if (n_constraints) { 1184 Mat M1,M2,M3; 1185 Mat auxmat; 1186 IS is_aux; 1187 PetscScalar *array,*array2; 1188 1189 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1190 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1191 1192 /* Extract constraints on R nodes: C_{CR} */ 1193 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1194 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1195 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1196 1197 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1198 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1199 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1200 for (i=0;i<n_constraints;i++) { 1201 const PetscScalar *row_cmat_values; 1202 const PetscInt *row_cmat_indices; 1203 PetscInt size_of_constraint,j; 1204 1205 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1206 for (j=0;j<size_of_constraint;j++) { 1207 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1208 } 1209 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1210 } 1211 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1212 if (F) { 1213 Mat B; 1214 1215 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1216 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1217 ierr = MatDestroy(&B);CHKERRQ(ierr); 1218 } else { 1219 PetscScalar *marr; 1220 1221 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1222 for (i=0;i<n_constraints;i++) { 1223 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1224 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1225 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1226 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1227 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1228 } 1229 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1230 } 1231 if (!pcbddc->switch_static) { 1232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1233 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1234 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1235 for (i=0;i<n_constraints;i++) { 1236 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1237 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1238 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1239 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1241 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1242 } 1243 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1244 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1245 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1246 } else { 1247 if (lda_rhs != n_R) { 1248 IS dummy; 1249 1250 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1251 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1252 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1253 } else { 1254 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1255 pcbddc->local_auxmat2 = local_auxmat2_R; 1256 } 1257 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1258 } 1259 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1260 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1261 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1262 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1263 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1264 if (isCHOL) { 1265 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1266 } else { 1267 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1268 } 1269 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1270 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1271 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1272 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1273 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1274 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1275 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1276 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1277 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1278 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1279 } 1280 /* Get submatrices from subdomain matrix */ 1281 if (pcbddc->benign_n) { 1282 IS dummy; 1283 Mat B0_R; 1284 PetscReal norm; 1285 1286 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1287 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1288 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1289 if (norm > PETSC_SMALL) { 1290 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1291 } 1292 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1293 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1294 } 1295 1296 if (n_vertices) { 1297 IS is_aux; 1298 1299 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1300 IS tis; 1301 1302 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1303 ierr = ISSort(tis);CHKERRQ(ierr); 1304 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1305 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1306 } else { 1307 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1308 } 1309 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1310 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1311 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1312 if (pcbddc->benign_n) { 1313 IS dummy; 1314 1315 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1316 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1317 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1318 } 1319 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1320 } 1321 1322 /* Matrix of coarse basis functions (local) */ 1323 if (pcbddc->coarse_phi_B) { 1324 PetscInt on_B,on_primal,on_D=n_D; 1325 if (pcbddc->coarse_phi_D) { 1326 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1327 } 1328 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1329 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1330 PetscScalar *marray; 1331 1332 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1333 ierr = PetscFree(marray);CHKERRQ(ierr); 1334 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1335 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1336 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1337 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1338 } 1339 } 1340 1341 if (!pcbddc->coarse_phi_B) { 1342 PetscScalar *marray; 1343 1344 n = n_B*pcbddc->local_primal_size; 1345 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1346 n += n_D*pcbddc->local_primal_size; 1347 } 1348 if (!pcbddc->symmetric_primal) { 1349 n *= 2; 1350 } 1351 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1352 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1353 n = n_B*pcbddc->local_primal_size; 1354 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1355 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1356 n += n_D*pcbddc->local_primal_size; 1357 } 1358 if (!pcbddc->symmetric_primal) { 1359 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1360 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1361 n = n_B*pcbddc->local_primal_size; 1362 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1363 } 1364 } else { 1365 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1366 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1367 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1368 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1369 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1370 } 1371 } 1372 } 1373 1374 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1375 p0_lidx_I = NULL; 1376 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1377 const PetscInt *idxs; 1378 1379 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1380 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1381 for (i=0;i<pcbddc->benign_n;i++) { 1382 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1383 } 1384 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1385 } 1386 1387 /* vertices */ 1388 if (n_vertices) { 1389 1390 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1391 1392 if (n_R) { 1393 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1394 PetscBLASInt B_N,B_one = 1; 1395 PetscScalar *x,*y; 1396 PetscBool isseqaij; 1397 1398 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1399 if (lda_rhs == n_R) { 1400 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1401 } else { 1402 Mat A_RVt; 1403 PetscScalar *array; 1404 1405 /* TODO optimize */ 1406 ierr = MatConvert(A_RV,MATDENSE,MAT_INITIAL_MATRIX,&A_RVt);CHKERRQ(ierr); 1407 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1408 ierr = PetscMemzero(work+lda_rhs*n_vertices,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1409 ierr = MatDenseGetArray(A_RVt,&array);CHKERRQ(ierr); 1410 for (i=0;i<n_vertices;i++) { 1411 ierr = PetscMemcpy(work+(n_vertices+i)*lda_rhs,array+i*n_R,n_R*sizeof(PetscScalar));CHKERRQ(ierr); 1412 } 1413 ierr = MatDenseRestoreArray(A_RVt,&array);CHKERRQ(ierr); 1414 ierr = MatDestroy(&A_RVt);CHKERRQ(ierr); 1415 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&A_RV);CHKERRQ(ierr); 1416 } 1417 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1418 if (F) { /* TODO could be optimized for symmetric problems */ 1419 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1420 } else { 1421 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1422 for (i=0;i<n_vertices;i++) { 1423 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1424 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1425 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1426 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1427 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1428 } 1429 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1430 } 1431 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1432 /* S_VV and S_CV */ 1433 if (n_constraints) { 1434 Mat B; 1435 1436 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1437 for (i=0;i<n_vertices;i++) { 1438 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1439 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1440 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1441 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1442 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1443 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1444 } 1445 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1446 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1447 ierr = MatDestroy(&B);CHKERRQ(ierr); 1448 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1449 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1450 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1451 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1452 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1453 ierr = MatDestroy(&B);CHKERRQ(ierr); 1454 } 1455 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1456 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1457 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1458 } 1459 if (lda_rhs != n_R) { 1460 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1461 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1462 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1463 } 1464 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1465 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1466 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1467 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1468 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1469 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1470 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1471 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1472 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1473 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1474 } else { 1475 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1476 } 1477 for (i=0;i<pcbddc->benign_n;i++) { 1478 const PetscScalar *vals; 1479 const PetscInt *idxs; 1480 PetscInt n,j,primal_idx; 1481 1482 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1483 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1484 for (j=0;j<n;j++) { 1485 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1486 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1487 } 1488 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1489 } 1490 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1491 1492 /* coarse basis functions */ 1493 for (i=0;i<n_vertices;i++) { 1494 PetscScalar *y; 1495 1496 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1497 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1498 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1499 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 y[n_B*i+idx_V_B[i]] = 1.0; 1502 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1503 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1504 1505 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1506 PetscInt j; 1507 1508 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1509 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1510 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1511 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1513 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1514 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1515 } 1516 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1517 } 1518 /* if n_R == 0 the object is not destroyed */ 1519 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1520 } 1521 1522 if (n_constraints) { 1523 Mat B; 1524 1525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1526 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1527 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1528 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1529 if (n_vertices) { 1530 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1531 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1532 } else { 1533 Mat S_VCt; 1534 1535 if (lda_rhs != n_R) { 1536 ierr = MatDestroy(&B);CHKERRQ(ierr); 1537 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1538 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1539 } 1540 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1541 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1542 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1543 } 1544 } 1545 ierr = MatDestroy(&B);CHKERRQ(ierr); 1546 /* coarse basis functions */ 1547 for (i=0;i<n_constraints;i++) { 1548 PetscScalar *y; 1549 1550 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1551 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1552 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1553 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1554 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1555 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1556 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1557 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1558 PetscInt j; 1559 1560 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1561 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1562 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1565 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1566 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1567 } 1568 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1569 } 1570 } 1571 if (n_constraints) { 1572 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1573 } 1574 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1575 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1576 1577 /* compute other basis functions for non-symmetric problems */ 1578 if (!pcbddc->symmetric_primal) { 1579 Mat B_V=NULL,B_C=NULL; 1580 PetscScalar *marray; 1581 1582 if (n_constraints) { 1583 Mat S_CCT,C_CRT; 1584 1585 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 1586 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1587 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1588 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1589 if (n_vertices) { 1590 Mat S_VCT; 1591 1592 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1593 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1594 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1595 } 1596 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 1597 } 1598 if (n_vertices && n_R) { 1599 PetscScalar *av,*marray; 1600 const PetscInt *xadj,*adjncy; 1601 PetscInt n; 1602 PetscBool flg_row; 1603 1604 /* B_V = B_V - A_VR^T */ 1605 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1606 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1607 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 1608 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1609 for (i=0;i<n;i++) { 1610 PetscInt j; 1611 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 1612 } 1613 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1614 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1615 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1616 } 1617 1618 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1619 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1620 for (i=0;i<n_vertices;i++) { 1621 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 1622 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1623 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1624 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1625 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1626 } 1627 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1628 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 1629 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 1630 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 1631 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1632 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1633 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1634 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1635 } 1636 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 1637 /* coarse basis functions */ 1638 for (i=0;i<pcbddc->local_primal_size;i++) { 1639 PetscScalar *y; 1640 1641 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1642 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1643 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1644 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 if (i<n_vertices) { 1647 y[n_B*i+idx_V_B[i]] = 1.0; 1648 } 1649 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1650 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1651 1652 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1653 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1654 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1655 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1656 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1658 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1659 } 1660 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1661 } 1662 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1663 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1664 } 1665 /* free memory */ 1666 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1667 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1668 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1669 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1670 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1671 ierr = PetscFree(work);CHKERRQ(ierr); 1672 if (n_vertices) { 1673 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1674 } 1675 if (n_constraints) { 1676 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1677 } 1678 /* Checking coarse_sub_mat and coarse basis functios */ 1679 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1680 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1681 if (pcbddc->dbg_flag) { 1682 Mat coarse_sub_mat; 1683 Mat AUXMAT,TM1,TM2,TM3,TM4; 1684 Mat coarse_phi_D,coarse_phi_B; 1685 Mat coarse_psi_D,coarse_psi_B; 1686 Mat A_II,A_BB,A_IB,A_BI; 1687 Mat C_B,CPHI; 1688 IS is_dummy; 1689 Vec mones; 1690 MatType checkmattype=MATSEQAIJ; 1691 PetscReal real_value; 1692 1693 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1694 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1695 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1696 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1697 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1698 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1699 if (!pcbddc->symmetric_primal) { 1700 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1701 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1702 } 1703 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1704 1705 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1706 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1707 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1708 if (!pcbddc->symmetric_primal) { 1709 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1710 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1711 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1712 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1713 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1714 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1715 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1716 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1717 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1718 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1719 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1720 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1721 } else { 1722 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1723 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1724 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1725 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1726 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1727 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1728 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1729 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1730 } 1731 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1732 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1733 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1734 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1735 if (pcbddc->benign_n) { 1736 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1737 PetscScalar *data,*data2; 1738 PetscInt j; 1739 1740 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1741 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1742 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1743 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1744 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1745 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1746 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1747 for (j=0;j<pcbddc->benign_n;j++) { 1748 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1749 for (i=0;i<pcbddc->local_primal_size;i++) { 1750 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1751 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1752 } 1753 } 1754 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1755 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1756 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1757 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1758 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1759 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1760 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1761 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1762 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1763 } 1764 #if 0 1765 { 1766 PetscViewer viewer; 1767 char filename[256]; 1768 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 1769 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1770 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1771 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 1772 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 1773 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 1774 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1775 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 1776 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 1777 if (pcbddc->coarse_phi_D) { 1778 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 1779 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 1780 } 1781 if (pcbddc->coarse_psi_B) { 1782 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 1783 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 1784 } 1785 if (pcbddc->coarse_phi_B) { 1786 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 1787 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 1788 } 1789 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1790 } 1791 #endif 1792 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1793 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1794 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1795 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1796 1797 /* check constraints */ 1798 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1799 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1800 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1801 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1802 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1803 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1804 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1805 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1806 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1807 if (!pcbddc->symmetric_primal) { 1808 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1809 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1810 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1811 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1812 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1813 } 1814 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1815 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1816 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1817 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1818 } 1819 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1820 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1821 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1822 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1823 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1824 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1825 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1826 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1827 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1828 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1829 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1830 if (!pcbddc->symmetric_primal) { 1831 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1832 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1833 } 1834 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1835 } 1836 /* get back data */ 1837 *coarse_submat_vals_n = coarse_submat_vals; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 #undef __FUNCT__ 1842 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1843 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1844 { 1845 Mat *work_mat; 1846 IS isrow_s,iscol_s; 1847 PetscBool rsorted,csorted; 1848 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1849 PetscErrorCode ierr; 1850 1851 PetscFunctionBegin; 1852 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1853 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1854 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1855 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1856 1857 if (!rsorted) { 1858 const PetscInt *idxs; 1859 PetscInt *idxs_sorted,i; 1860 1861 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1862 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1863 for (i=0;i<rsize;i++) { 1864 idxs_perm_r[i] = i; 1865 } 1866 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1867 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1868 for (i=0;i<rsize;i++) { 1869 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1870 } 1871 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1872 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1873 } else { 1874 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1875 isrow_s = isrow; 1876 } 1877 1878 if (!csorted) { 1879 if (isrow == iscol) { 1880 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1881 iscol_s = isrow_s; 1882 } else { 1883 const PetscInt *idxs; 1884 PetscInt *idxs_sorted,i; 1885 1886 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1887 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1888 for (i=0;i<csize;i++) { 1889 idxs_perm_c[i] = i; 1890 } 1891 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1892 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1893 for (i=0;i<csize;i++) { 1894 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1895 } 1896 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1897 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1898 } 1899 } else { 1900 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1901 iscol_s = iscol; 1902 } 1903 1904 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1905 1906 if (!rsorted || !csorted) { 1907 Mat new_mat; 1908 IS is_perm_r,is_perm_c; 1909 1910 if (!rsorted) { 1911 PetscInt *idxs_r,i; 1912 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1913 for (i=0;i<rsize;i++) { 1914 idxs_r[idxs_perm_r[i]] = i; 1915 } 1916 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1917 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1918 } else { 1919 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1920 } 1921 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1922 1923 if (!csorted) { 1924 if (isrow_s == iscol_s) { 1925 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1926 is_perm_c = is_perm_r; 1927 } else { 1928 PetscInt *idxs_c,i; 1929 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1930 for (i=0;i<csize;i++) { 1931 idxs_c[idxs_perm_c[i]] = i; 1932 } 1933 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1934 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1935 } 1936 } else { 1937 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1938 } 1939 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1940 1941 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1942 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1943 work_mat[0] = new_mat; 1944 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1945 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1946 } 1947 1948 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1949 *B = work_mat[0]; 1950 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1951 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1952 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1958 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1959 { 1960 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1961 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1962 Mat new_mat; 1963 IS is_local,is_global; 1964 PetscInt local_size; 1965 PetscBool isseqaij; 1966 PetscErrorCode ierr; 1967 1968 PetscFunctionBegin; 1969 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1970 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1971 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1972 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1973 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1974 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1975 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1976 1977 /* check */ 1978 if (pcbddc->dbg_flag) { 1979 Vec x,x_change; 1980 PetscReal error; 1981 1982 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1983 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1984 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1985 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1986 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1987 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1988 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1989 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1990 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 1991 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 1992 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 1994 ierr = VecDestroy(&x);CHKERRQ(ierr); 1995 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 1996 } 1997 1998 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 1999 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2000 if (isseqaij) { 2001 Mat M; 2002 2003 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2004 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2005 ierr = MatDestroy(&M);CHKERRQ(ierr); 2006 } else { 2007 Mat work_mat,M; 2008 2009 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2010 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2011 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2012 ierr = MatDestroy(&M);CHKERRQ(ierr); 2013 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2014 } 2015 if (matis->A->symmetric_set) { 2016 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2017 #if !defined(PETSC_USE_COMPLEX) 2018 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2019 #endif 2020 } 2021 /* 2022 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2023 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2024 */ 2025 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 #undef __FUNCT__ 2030 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2031 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2032 { 2033 PC_IS* pcis = (PC_IS*)(pc->data); 2034 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2035 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2036 PetscInt *idx_R_local=NULL; 2037 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2038 PetscInt vbs,bs; 2039 PetscBT bitmask=NULL; 2040 PetscErrorCode ierr; 2041 2042 PetscFunctionBegin; 2043 /* 2044 No need to setup local scatters if 2045 - primal space is unchanged 2046 AND 2047 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2048 AND 2049 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2050 */ 2051 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2052 PetscFunctionReturn(0); 2053 } 2054 /* destroy old objects */ 2055 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2056 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2057 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2058 /* Set Non-overlapping dimensions */ 2059 n_B = pcis->n_B; 2060 n_D = pcis->n - n_B; 2061 n_vertices = pcbddc->n_vertices; 2062 2063 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2064 2065 /* create auxiliary bitmask and allocate workspace */ 2066 if (!sub_schurs->reuse_mumps) { 2067 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2068 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2069 for (i=0;i<n_vertices;i++) { 2070 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2071 } 2072 2073 for (i=0, n_R=0; i<pcis->n; i++) { 2074 if (!PetscBTLookup(bitmask,i)) { 2075 idx_R_local[n_R++] = i; 2076 } 2077 } 2078 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2079 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2080 2081 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2082 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2083 } 2084 2085 /* Block code */ 2086 vbs = 1; 2087 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2088 if (bs>1 && !(n_vertices%bs)) { 2089 PetscBool is_blocked = PETSC_TRUE; 2090 PetscInt *vary; 2091 if (!sub_schurs->reuse_mumps) { 2092 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2093 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2094 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2095 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2096 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2097 for (i=0; i<pcis->n/bs; i++) { 2098 if (vary[i]!=0 && vary[i]!=bs) { 2099 is_blocked = PETSC_FALSE; 2100 break; 2101 } 2102 } 2103 ierr = PetscFree(vary);CHKERRQ(ierr); 2104 } else { 2105 /* Verify directly the R set */ 2106 for (i=0; i<n_R/bs; i++) { 2107 PetscInt j,node=idx_R_local[bs*i]; 2108 for (j=1; j<bs; j++) { 2109 if (node != idx_R_local[bs*i+j]-j) { 2110 is_blocked = PETSC_FALSE; 2111 break; 2112 } 2113 } 2114 } 2115 } 2116 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2117 vbs = bs; 2118 for (i=0;i<n_R/vbs;i++) { 2119 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2120 } 2121 } 2122 } 2123 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2124 if (sub_schurs->reuse_mumps) { 2125 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2126 2127 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2128 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2129 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2130 reuse_mumps->is_R = pcbddc->is_R_local; 2131 } else { 2132 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2133 } 2134 2135 /* print some info if requested */ 2136 if (pcbddc->dbg_flag) { 2137 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2138 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2139 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2140 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2141 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2142 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2143 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2144 } 2145 2146 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2147 if (!sub_schurs->reuse_mumps) { 2148 IS is_aux1,is_aux2; 2149 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2150 2151 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2152 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2153 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2154 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2155 for (i=0; i<n_D; i++) { 2156 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2157 } 2158 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2159 for (i=0, j=0; i<n_R; i++) { 2160 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2161 aux_array1[j++] = i; 2162 } 2163 } 2164 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2165 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2166 for (i=0, j=0; i<n_B; i++) { 2167 if (!PetscBTLookup(bitmask,is_indices[i])) { 2168 aux_array2[j++] = i; 2169 } 2170 } 2171 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2172 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2173 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2174 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2175 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2176 2177 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2178 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2179 for (i=0, j=0; i<n_R; i++) { 2180 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2181 aux_array1[j++] = i; 2182 } 2183 } 2184 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2185 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2186 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2187 } 2188 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2189 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2190 } else { 2191 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2192 IS tis; 2193 PetscInt schur_size; 2194 2195 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2196 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2197 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2198 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2199 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2200 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2201 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2202 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2203 } 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2211 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2212 { 2213 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2214 PC_IS *pcis = (PC_IS*)pc->data; 2215 PC pc_temp; 2216 Mat A_RR; 2217 MatReuse reuse; 2218 PetscScalar m_one = -1.0; 2219 PetscReal value; 2220 PetscInt n_D,n_R; 2221 PetscBool use_exact,use_exact_reduced,issbaij; 2222 PetscErrorCode ierr; 2223 /* prefixes stuff */ 2224 char dir_prefix[256],neu_prefix[256],str_level[16]; 2225 size_t len; 2226 2227 PetscFunctionBegin; 2228 2229 /* compute prefixes */ 2230 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2231 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2232 if (!pcbddc->current_level) { 2233 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2234 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2235 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2236 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2237 } else { 2238 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2239 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2240 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2241 len -= 15; /* remove "pc_bddc_coarse_" */ 2242 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2243 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2244 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2245 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2246 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2247 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2248 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2249 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2250 } 2251 2252 /* DIRICHLET PROBLEM */ 2253 if (dirichlet) { 2254 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2255 if (pcbddc->local_mat->symmetric_set) { 2256 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2257 } 2258 /* Matrix for Dirichlet problem is pcis->A_II */ 2259 n_D = pcis->n - pcis->n_B; 2260 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2261 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2262 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2263 /* default */ 2264 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2265 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2266 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2267 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2268 if (issbaij) { 2269 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2270 } else { 2271 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2272 } 2273 /* Allow user's customization */ 2274 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2275 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2276 } 2277 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2278 if (sub_schurs->reuse_mumps) { 2279 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2280 2281 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2282 } 2283 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2284 if (!n_D) { 2285 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2286 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2287 } 2288 /* Set Up KSP for Dirichlet problem of BDDC */ 2289 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2290 /* set ksp_D into pcis data */ 2291 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2292 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2293 pcis->ksp_D = pcbddc->ksp_D; 2294 } 2295 2296 /* NEUMANN PROBLEM */ 2297 A_RR = 0; 2298 if (neumann) { 2299 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2300 PetscInt ibs,mbs; 2301 PetscBool issbaij; 2302 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2303 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2304 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2305 if (pcbddc->ksp_R) { /* already created ksp */ 2306 PetscInt nn_R; 2307 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2308 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2309 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2310 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2311 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2312 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2313 reuse = MAT_INITIAL_MATRIX; 2314 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2315 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2316 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2317 reuse = MAT_INITIAL_MATRIX; 2318 } else { /* safe to reuse the matrix */ 2319 reuse = MAT_REUSE_MATRIX; 2320 } 2321 } 2322 /* last check */ 2323 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2324 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2325 reuse = MAT_INITIAL_MATRIX; 2326 } 2327 } else { /* first time, so we need to create the matrix */ 2328 reuse = MAT_INITIAL_MATRIX; 2329 } 2330 /* extract A_RR */ 2331 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2332 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2333 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2334 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2335 if (matis->A == pcbddc->local_mat) { 2336 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2337 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2338 } else { 2339 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2340 } 2341 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2342 if (matis->A == pcbddc->local_mat) { 2343 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2344 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2345 } else { 2346 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2347 } 2348 } 2349 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2350 if (pcbddc->local_mat->symmetric_set) { 2351 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2352 } 2353 if (!pcbddc->ksp_R) { /* create object if not present */ 2354 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2355 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2356 /* default */ 2357 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2358 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2359 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2360 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2361 if (issbaij) { 2362 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2363 } else { 2364 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2365 } 2366 /* Allow user's customization */ 2367 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2368 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2369 } 2370 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2371 if (!n_R) { 2372 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2373 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2374 } 2375 /* Reuse MUMPS solver if it is present */ 2376 if (sub_schurs->reuse_mumps) { 2377 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2378 2379 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2380 } 2381 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2382 /* Set Up KSP for Neumann problem of BDDC */ 2383 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2384 } 2385 /* free Neumann problem's matrix */ 2386 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2387 2388 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2389 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2390 if (pcbddc->dbg_flag) { 2391 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2392 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2393 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2394 } 2395 if (dirichlet) { /* Dirichlet */ 2396 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2397 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2398 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2399 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2400 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2401 /* need to be adapted? */ 2402 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2403 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2404 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2405 /* print info */ 2406 if (pcbddc->dbg_flag) { 2407 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2409 } 2410 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2411 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2412 } 2413 } 2414 if (neumann) { /* Neumann */ 2415 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2416 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2417 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2418 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2419 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2420 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2421 /* need to be adapted? */ 2422 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2423 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2424 /* print info */ 2425 if (pcbddc->dbg_flag) { 2426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2427 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2428 } 2429 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2430 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2431 } 2432 } 2433 } 2434 PetscFunctionReturn(0); 2435 } 2436 2437 #undef __FUNCT__ 2438 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2439 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2440 { 2441 PetscErrorCode ierr; 2442 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2443 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2444 2445 PetscFunctionBegin; 2446 if (!sub_schurs->reuse_mumps) { 2447 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2448 } 2449 if (!pcbddc->switch_static) { 2450 if (applytranspose && pcbddc->local_auxmat1) { 2451 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2452 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2453 } 2454 if (!sub_schurs->reuse_mumps) { 2455 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2456 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2457 } else { 2458 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2459 2460 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2461 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2462 } 2463 } else { 2464 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2465 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2466 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2467 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2468 if (applytranspose && pcbddc->local_auxmat1) { 2469 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2470 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2471 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2472 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2473 } 2474 } 2475 if (!sub_schurs->reuse_mumps) { 2476 if (applytranspose) { 2477 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2478 } else { 2479 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2480 } 2481 #if defined(PETSC_HAVE_MUMPS) 2482 } else { 2483 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2484 2485 if (applytranspose) { 2486 ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2487 } else { 2488 ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2489 } 2490 #endif 2491 } 2492 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2493 if (!pcbddc->switch_static) { 2494 if (!sub_schurs->reuse_mumps) { 2495 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2496 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2497 } else { 2498 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2499 2500 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2501 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2502 } 2503 if (!applytranspose && pcbddc->local_auxmat1) { 2504 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2505 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2506 } 2507 } else { 2508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2510 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2511 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2512 if (!applytranspose && pcbddc->local_auxmat1) { 2513 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2514 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2515 } 2516 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2518 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2519 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2520 } 2521 PetscFunctionReturn(0); 2522 } 2523 2524 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2527 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2528 { 2529 PetscErrorCode ierr; 2530 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2531 PC_IS* pcis = (PC_IS*) (pc->data); 2532 const PetscScalar zero = 0.0; 2533 2534 PetscFunctionBegin; 2535 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2536 if (applytranspose) { 2537 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2538 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2539 } else { 2540 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2541 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2542 } 2543 2544 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2545 if (pcbddc->benign_n) { 2546 PetscScalar *array; 2547 PetscInt j; 2548 2549 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2550 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2551 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2552 } 2553 2554 /* start communications from local primal nodes to rhs of coarse solver */ 2555 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2556 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2557 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2558 2559 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2560 /* TODO remove null space when doing multilevel */ 2561 if (pcbddc->coarse_ksp) { 2562 Mat coarse_mat; 2563 Vec rhs,sol; 2564 MatNullSpace nullsp; 2565 2566 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2567 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2568 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2569 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2570 if (nullsp) { 2571 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2572 } 2573 if (applytranspose) { 2574 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2575 } else { 2576 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2577 } 2578 if (nullsp) { 2579 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2580 } 2581 } 2582 2583 /* Local solution on R nodes */ 2584 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2585 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2586 } 2587 2588 /* communications from coarse sol to local primal nodes */ 2589 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2590 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2591 2592 /* Sum contributions from two levels */ 2593 if (applytranspose) { 2594 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2595 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2596 } else { 2597 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2598 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2599 } 2600 /* store p0 */ 2601 if (pcbddc->benign_n) { 2602 PetscScalar *array; 2603 PetscInt j; 2604 2605 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2606 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2607 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2608 } 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2614 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2615 { 2616 PetscErrorCode ierr; 2617 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2618 PetscScalar *array; 2619 Vec from,to; 2620 2621 PetscFunctionBegin; 2622 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2623 from = pcbddc->coarse_vec; 2624 to = pcbddc->vec1_P; 2625 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2626 Vec tvec; 2627 2628 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2629 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2630 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2631 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2632 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2633 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2634 } 2635 } else { /* from local to global -> put data in coarse right hand side */ 2636 from = pcbddc->vec1_P; 2637 to = pcbddc->coarse_vec; 2638 } 2639 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2640 PetscFunctionReturn(0); 2641 } 2642 2643 #undef __FUNCT__ 2644 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2645 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2646 { 2647 PetscErrorCode ierr; 2648 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2649 PetscScalar *array; 2650 Vec from,to; 2651 2652 PetscFunctionBegin; 2653 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2654 from = pcbddc->coarse_vec; 2655 to = pcbddc->vec1_P; 2656 } else { /* from local to global -> put data in coarse right hand side */ 2657 from = pcbddc->vec1_P; 2658 to = pcbddc->coarse_vec; 2659 } 2660 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2661 if (smode == SCATTER_FORWARD) { 2662 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2663 Vec tvec; 2664 2665 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2666 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2667 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2668 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2669 } 2670 } else { 2671 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2672 ierr = VecResetArray(from);CHKERRQ(ierr); 2673 } 2674 } 2675 PetscFunctionReturn(0); 2676 } 2677 2678 /* uncomment for testing purposes */ 2679 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2680 #undef __FUNCT__ 2681 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2682 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2683 { 2684 PetscErrorCode ierr; 2685 PC_IS* pcis = (PC_IS*)(pc->data); 2686 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2687 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2688 /* one and zero */ 2689 PetscScalar one=1.0,zero=0.0; 2690 /* space to store constraints and their local indices */ 2691 PetscScalar *constraints_data; 2692 PetscInt *constraints_idxs,*constraints_idxs_B; 2693 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2694 PetscInt *constraints_n; 2695 /* iterators */ 2696 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2697 /* BLAS integers */ 2698 PetscBLASInt lwork,lierr; 2699 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2700 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2701 /* reuse */ 2702 PetscInt olocal_primal_size,olocal_primal_size_cc; 2703 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2704 /* change of basis */ 2705 PetscBool qr_needed; 2706 PetscBT change_basis,qr_needed_idx; 2707 /* auxiliary stuff */ 2708 PetscInt *nnz,*is_indices; 2709 PetscInt ncc; 2710 /* some quantities */ 2711 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2712 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2713 2714 PetscFunctionBegin; 2715 /* Destroy Mat objects computed previously */ 2716 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2717 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2718 /* save info on constraints from previous setup (if any) */ 2719 olocal_primal_size = pcbddc->local_primal_size; 2720 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2721 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2722 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2723 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2724 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2725 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2726 2727 /* print some info */ 2728 if (pcbddc->dbg_flag) { 2729 IS vertices; 2730 PetscInt nv,nedges,nfaces; 2731 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2732 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2733 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2734 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 2735 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2736 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2737 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2738 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2739 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2740 } 2741 2742 if (!pcbddc->adaptive_selection) { 2743 IS ISForVertices,*ISForFaces,*ISForEdges; 2744 MatNullSpace nearnullsp; 2745 const Vec *nearnullvecs; 2746 Vec *localnearnullsp; 2747 PetscScalar *array; 2748 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2749 PetscBool nnsp_has_cnst; 2750 /* LAPACK working arrays for SVD or POD */ 2751 PetscBool skip_lapack,boolforchange; 2752 PetscScalar *work; 2753 PetscReal *singular_vals; 2754 #if defined(PETSC_USE_COMPLEX) 2755 PetscReal *rwork; 2756 #endif 2757 #if defined(PETSC_MISSING_LAPACK_GESVD) 2758 PetscScalar *temp_basis,*correlation_mat; 2759 #else 2760 PetscBLASInt dummy_int=1; 2761 PetscScalar dummy_scalar=1.; 2762 #endif 2763 2764 /* Get index sets for faces, edges and vertices from graph */ 2765 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2766 /* free unneeded index sets */ 2767 if (!pcbddc->use_vertices) { 2768 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2769 } 2770 if (!pcbddc->use_edges) { 2771 for (i=0;i<n_ISForEdges;i++) { 2772 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2773 } 2774 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2775 n_ISForEdges = 0; 2776 } 2777 if (!pcbddc->use_faces) { 2778 for (i=0;i<n_ISForFaces;i++) { 2779 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2780 } 2781 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2782 n_ISForFaces = 0; 2783 } 2784 2785 #if defined(PETSC_USE_DEBUG) 2786 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2787 Also use_change_of_basis should be consistent among processors */ 2788 if (pcbddc->NullSpace) { 2789 PetscBool tbool[2],gbool[2]; 2790 2791 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2792 pcbddc->use_change_of_basis = PETSC_TRUE; 2793 if (!ISForEdges) { 2794 pcbddc->use_change_on_faces = PETSC_TRUE; 2795 } 2796 } 2797 tbool[0] = pcbddc->use_change_of_basis; 2798 tbool[1] = pcbddc->use_change_on_faces; 2799 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2800 pcbddc->use_change_of_basis = gbool[0]; 2801 pcbddc->use_change_on_faces = gbool[1]; 2802 } 2803 #endif 2804 2805 /* check if near null space is attached to global mat */ 2806 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2807 if (nearnullsp) { 2808 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2809 /* remove any stored info */ 2810 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2811 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2812 /* store information for BDDC solver reuse */ 2813 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2814 pcbddc->onearnullspace = nearnullsp; 2815 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2816 for (i=0;i<nnsp_size;i++) { 2817 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2818 } 2819 } else { /* if near null space is not provided BDDC uses constants by default */ 2820 nnsp_size = 0; 2821 nnsp_has_cnst = PETSC_TRUE; 2822 } 2823 /* get max number of constraints on a single cc */ 2824 max_constraints = nnsp_size; 2825 if (nnsp_has_cnst) max_constraints++; 2826 2827 /* 2828 Evaluate maximum storage size needed by the procedure 2829 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2830 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2831 There can be multiple constraints per connected component 2832 */ 2833 n_vertices = 0; 2834 if (ISForVertices) { 2835 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2836 } 2837 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2838 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2839 2840 total_counts = n_ISForFaces+n_ISForEdges; 2841 total_counts *= max_constraints; 2842 total_counts += n_vertices; 2843 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2844 2845 total_counts = 0; 2846 max_size_of_constraint = 0; 2847 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2848 IS used_is; 2849 if (i<n_ISForEdges) { 2850 used_is = ISForEdges[i]; 2851 } else { 2852 used_is = ISForFaces[i-n_ISForEdges]; 2853 } 2854 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2855 total_counts += j; 2856 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2857 } 2858 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2859 2860 /* get local part of global near null space vectors */ 2861 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2862 for (k=0;k<nnsp_size;k++) { 2863 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2864 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2865 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2866 } 2867 2868 /* whether or not to skip lapack calls */ 2869 skip_lapack = PETSC_TRUE; 2870 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2871 2872 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2873 if (!skip_lapack) { 2874 PetscScalar temp_work; 2875 2876 #if defined(PETSC_MISSING_LAPACK_GESVD) 2877 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2878 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2879 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2880 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2881 #if defined(PETSC_USE_COMPLEX) 2882 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2883 #endif 2884 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2885 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2886 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2887 lwork = -1; 2888 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2889 #if !defined(PETSC_USE_COMPLEX) 2890 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2891 #else 2892 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2893 #endif 2894 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2895 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2896 #else /* on missing GESVD */ 2897 /* SVD */ 2898 PetscInt max_n,min_n; 2899 max_n = max_size_of_constraint; 2900 min_n = max_constraints; 2901 if (max_size_of_constraint < max_constraints) { 2902 min_n = max_size_of_constraint; 2903 max_n = max_constraints; 2904 } 2905 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2906 #if defined(PETSC_USE_COMPLEX) 2907 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2908 #endif 2909 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2910 lwork = -1; 2911 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2912 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2913 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2914 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2915 #if !defined(PETSC_USE_COMPLEX) 2916 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2917 #else 2918 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2919 #endif 2920 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2921 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2922 #endif /* on missing GESVD */ 2923 /* Allocate optimal workspace */ 2924 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2925 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2926 } 2927 /* Now we can loop on constraining sets */ 2928 total_counts = 0; 2929 constraints_idxs_ptr[0] = 0; 2930 constraints_data_ptr[0] = 0; 2931 /* vertices */ 2932 if (n_vertices) { 2933 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2934 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2935 for (i=0;i<n_vertices;i++) { 2936 constraints_n[total_counts] = 1; 2937 constraints_data[total_counts] = 1.0; 2938 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2939 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2940 total_counts++; 2941 } 2942 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2943 n_vertices = total_counts; 2944 } 2945 2946 /* edges and faces */ 2947 total_counts_cc = total_counts; 2948 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2949 IS used_is; 2950 PetscBool idxs_copied = PETSC_FALSE; 2951 2952 if (ncc<n_ISForEdges) { 2953 used_is = ISForEdges[ncc]; 2954 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2955 } else { 2956 used_is = ISForFaces[ncc-n_ISForEdges]; 2957 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2958 } 2959 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2960 2961 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2962 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2963 /* change of basis should not be performed on local periodic nodes */ 2964 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2965 if (nnsp_has_cnst) { 2966 PetscScalar quad_value; 2967 2968 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2969 idxs_copied = PETSC_TRUE; 2970 2971 if (!pcbddc->use_nnsp_true) { 2972 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2973 } else { 2974 quad_value = 1.0; 2975 } 2976 for (j=0;j<size_of_constraint;j++) { 2977 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2978 } 2979 temp_constraints++; 2980 total_counts++; 2981 } 2982 for (k=0;k<nnsp_size;k++) { 2983 PetscReal real_value; 2984 PetscScalar *ptr_to_data; 2985 2986 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2987 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2988 for (j=0;j<size_of_constraint;j++) { 2989 ptr_to_data[j] = array[is_indices[j]]; 2990 } 2991 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2992 /* check if array is null on the connected component */ 2993 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2994 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 2995 if (real_value > 0.0) { /* keep indices and values */ 2996 temp_constraints++; 2997 total_counts++; 2998 if (!idxs_copied) { 2999 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3000 idxs_copied = PETSC_TRUE; 3001 } 3002 } 3003 } 3004 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3005 valid_constraints = temp_constraints; 3006 if (!pcbddc->use_nnsp_true && temp_constraints) { 3007 if (temp_constraints == 1) { /* just normalize the constraint */ 3008 PetscScalar norm,*ptr_to_data; 3009 3010 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3011 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3012 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3013 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3014 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3015 } else { /* perform SVD */ 3016 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3017 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3018 3019 #if defined(PETSC_MISSING_LAPACK_GESVD) 3020 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3021 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3022 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3023 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3024 from that computed using LAPACKgesvd 3025 -> This is due to a different computation of eigenvectors in LAPACKheev 3026 -> The quality of the POD-computed basis will be the same */ 3027 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3028 /* Store upper triangular part of correlation matrix */ 3029 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3030 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3031 for (j=0;j<temp_constraints;j++) { 3032 for (k=0;k<j+1;k++) { 3033 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3034 } 3035 } 3036 /* compute eigenvalues and eigenvectors of correlation matrix */ 3037 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3038 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3039 #if !defined(PETSC_USE_COMPLEX) 3040 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3041 #else 3042 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3043 #endif 3044 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3045 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3046 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3047 j = 0; 3048 while (j < temp_constraints && singular_vals[j] < tol) j++; 3049 total_counts = total_counts-j; 3050 valid_constraints = temp_constraints-j; 3051 /* scale and copy POD basis into used quadrature memory */ 3052 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3053 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3054 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3055 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3056 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3057 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3058 if (j<temp_constraints) { 3059 PetscInt ii; 3060 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3061 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3062 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3063 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3064 for (k=0;k<temp_constraints-j;k++) { 3065 for (ii=0;ii<size_of_constraint;ii++) { 3066 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3067 } 3068 } 3069 } 3070 #else /* on missing GESVD */ 3071 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3072 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3073 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3074 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3075 #if !defined(PETSC_USE_COMPLEX) 3076 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3077 #else 3078 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3079 #endif 3080 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3081 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3082 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3083 k = temp_constraints; 3084 if (k > size_of_constraint) k = size_of_constraint; 3085 j = 0; 3086 while (j < k && singular_vals[k-j-1] < tol) j++; 3087 valid_constraints = k-j; 3088 total_counts = total_counts-temp_constraints+valid_constraints; 3089 #endif /* on missing GESVD */ 3090 } 3091 } 3092 /* update pointers information */ 3093 if (valid_constraints) { 3094 constraints_n[total_counts_cc] = valid_constraints; 3095 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3096 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3097 /* set change_of_basis flag */ 3098 if (boolforchange) { 3099 PetscBTSet(change_basis,total_counts_cc); 3100 } 3101 total_counts_cc++; 3102 } 3103 } 3104 /* free workspace */ 3105 if (!skip_lapack) { 3106 ierr = PetscFree(work);CHKERRQ(ierr); 3107 #if defined(PETSC_USE_COMPLEX) 3108 ierr = PetscFree(rwork);CHKERRQ(ierr); 3109 #endif 3110 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3111 #if defined(PETSC_MISSING_LAPACK_GESVD) 3112 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3113 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3114 #endif 3115 } 3116 for (k=0;k<nnsp_size;k++) { 3117 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3118 } 3119 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3120 /* free index sets of faces, edges and vertices */ 3121 for (i=0;i<n_ISForFaces;i++) { 3122 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3123 } 3124 if (n_ISForFaces) { 3125 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3126 } 3127 for (i=0;i<n_ISForEdges;i++) { 3128 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3129 } 3130 if (n_ISForEdges) { 3131 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3132 } 3133 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3134 } else { 3135 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3136 3137 total_counts = 0; 3138 n_vertices = 0; 3139 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3140 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3141 } 3142 max_constraints = 0; 3143 total_counts_cc = 0; 3144 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3145 total_counts += pcbddc->adaptive_constraints_n[i]; 3146 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3147 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3148 } 3149 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3150 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3151 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3152 constraints_data = pcbddc->adaptive_constraints_data; 3153 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3154 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3155 total_counts_cc = 0; 3156 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3157 if (pcbddc->adaptive_constraints_n[i]) { 3158 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3159 } 3160 } 3161 #if 0 3162 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3163 for (i=0;i<total_counts_cc;i++) { 3164 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3165 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3166 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3167 printf(" %d",constraints_idxs[j]); 3168 } 3169 printf("\n"); 3170 printf("number of cc: %d\n",constraints_n[i]); 3171 } 3172 for (i=0;i<n_vertices;i++) { 3173 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3174 } 3175 for (i=0;i<sub_schurs->n_subs;i++) { 3176 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3177 } 3178 #endif 3179 3180 max_size_of_constraint = 0; 3181 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3182 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3183 /* Change of basis */ 3184 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3185 if (pcbddc->use_change_of_basis) { 3186 for (i=0;i<sub_schurs->n_subs;i++) { 3187 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3188 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3189 } 3190 } 3191 } 3192 } 3193 pcbddc->local_primal_size = total_counts; 3194 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3195 3196 /* map constraints_idxs in boundary numbering */ 3197 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3198 if (i != constraints_idxs_ptr[total_counts_cc]) { 3199 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3200 } 3201 3202 /* Create constraint matrix */ 3203 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3204 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3205 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3206 3207 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3208 /* determine if a QR strategy is needed for change of basis */ 3209 qr_needed = PETSC_FALSE; 3210 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3211 total_primal_vertices=0; 3212 pcbddc->local_primal_size_cc = 0; 3213 for (i=0;i<total_counts_cc;i++) { 3214 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3215 if (size_of_constraint == 1) { 3216 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3217 pcbddc->local_primal_size_cc += 1; 3218 } else if (PetscBTLookup(change_basis,i)) { 3219 for (k=0;k<constraints_n[i];k++) { 3220 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3221 } 3222 pcbddc->local_primal_size_cc += constraints_n[i]; 3223 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3224 PetscBTSet(qr_needed_idx,i); 3225 qr_needed = PETSC_TRUE; 3226 } 3227 } else { 3228 pcbddc->local_primal_size_cc += 1; 3229 } 3230 } 3231 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3232 pcbddc->n_vertices = total_primal_vertices; 3233 /* permute indices in order to have a sorted set of vertices */ 3234 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3235 3236 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3237 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3238 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3239 3240 /* nonzero structure of constraint matrix */ 3241 /* and get reference dof for local constraints */ 3242 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3243 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3244 3245 j = total_primal_vertices; 3246 total_counts = total_primal_vertices; 3247 cum = total_primal_vertices; 3248 for (i=n_vertices;i<total_counts_cc;i++) { 3249 if (!PetscBTLookup(change_basis,i)) { 3250 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3251 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3252 cum++; 3253 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3254 for (k=0;k<constraints_n[i];k++) { 3255 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3256 nnz[j+k] = size_of_constraint; 3257 } 3258 j += constraints_n[i]; 3259 } 3260 } 3261 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3262 ierr = PetscFree(nnz);CHKERRQ(ierr); 3263 3264 /* set values in constraint matrix */ 3265 for (i=0;i<total_primal_vertices;i++) { 3266 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3267 } 3268 total_counts = total_primal_vertices; 3269 for (i=n_vertices;i<total_counts_cc;i++) { 3270 if (!PetscBTLookup(change_basis,i)) { 3271 PetscInt *cols; 3272 3273 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3274 cols = constraints_idxs+constraints_idxs_ptr[i]; 3275 for (k=0;k<constraints_n[i];k++) { 3276 PetscInt row = total_counts+k; 3277 PetscScalar *vals; 3278 3279 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3280 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3281 } 3282 total_counts += constraints_n[i]; 3283 } 3284 } 3285 /* assembling */ 3286 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3287 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3288 3289 /* 3290 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3291 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3292 */ 3293 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3294 if (pcbddc->use_change_of_basis) { 3295 /* dual and primal dofs on a single cc */ 3296 PetscInt dual_dofs,primal_dofs; 3297 /* working stuff for GEQRF */ 3298 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3299 PetscBLASInt lqr_work; 3300 /* working stuff for UNGQR */ 3301 PetscScalar *gqr_work,lgqr_work_t; 3302 PetscBLASInt lgqr_work; 3303 /* working stuff for TRTRS */ 3304 PetscScalar *trs_rhs; 3305 PetscBLASInt Blas_NRHS; 3306 /* pointers for values insertion into change of basis matrix */ 3307 PetscInt *start_rows,*start_cols; 3308 PetscScalar *start_vals; 3309 /* working stuff for values insertion */ 3310 PetscBT is_primal; 3311 PetscInt *aux_primal_numbering_B; 3312 /* matrix sizes */ 3313 PetscInt global_size,local_size; 3314 /* temporary change of basis */ 3315 Mat localChangeOfBasisMatrix; 3316 /* extra space for debugging */ 3317 PetscScalar *dbg_work; 3318 3319 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3320 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3321 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3322 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3323 /* nonzeros for local mat */ 3324 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3325 for (i=0;i<pcis->n;i++) nnz[i]=1; 3326 for (i=n_vertices;i<total_counts_cc;i++) { 3327 if (PetscBTLookup(change_basis,i)) { 3328 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3329 if (PetscBTLookup(qr_needed_idx,i)) { 3330 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3331 } else { 3332 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3333 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3334 } 3335 } 3336 } 3337 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3338 ierr = PetscFree(nnz);CHKERRQ(ierr); 3339 /* Set initial identity in the matrix */ 3340 for (i=0;i<pcis->n;i++) { 3341 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3342 } 3343 3344 if (pcbddc->dbg_flag) { 3345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3346 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3347 } 3348 3349 3350 /* Now we loop on the constraints which need a change of basis */ 3351 /* 3352 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3353 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3354 3355 Basic blocks of change of basis matrix T computed by 3356 3357 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3358 3359 | 1 0 ... 0 s_1/S | 3360 | 0 1 ... 0 s_2/S | 3361 | ... | 3362 | 0 ... 1 s_{n-1}/S | 3363 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3364 3365 with S = \sum_{i=1}^n s_i^2 3366 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3367 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3368 3369 - QR decomposition of constraints otherwise 3370 */ 3371 if (qr_needed) { 3372 /* space to store Q */ 3373 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3374 /* first we issue queries for optimal work */ 3375 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3376 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3377 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3378 lqr_work = -1; 3379 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3380 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3381 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3382 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3383 lgqr_work = -1; 3384 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3385 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3386 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3387 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3388 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3389 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3390 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3391 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3392 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3393 /* array to store scaling factors for reflectors */ 3394 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3395 /* array to store rhs and solution of triangular solver */ 3396 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3397 /* allocating workspace for check */ 3398 if (pcbddc->dbg_flag) { 3399 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3400 } 3401 } 3402 /* array to store whether a node is primal or not */ 3403 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3404 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3405 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3406 if (i != total_primal_vertices) { 3407 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3408 } 3409 for (i=0;i<total_primal_vertices;i++) { 3410 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3411 } 3412 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3413 3414 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3415 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3416 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3417 if (PetscBTLookup(change_basis,total_counts)) { 3418 /* get constraint info */ 3419 primal_dofs = constraints_n[total_counts]; 3420 dual_dofs = size_of_constraint-primal_dofs; 3421 3422 if (pcbddc->dbg_flag) { 3423 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3424 } 3425 3426 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3427 3428 /* copy quadrature constraints for change of basis check */ 3429 if (pcbddc->dbg_flag) { 3430 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3431 } 3432 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3433 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3434 3435 /* compute QR decomposition of constraints */ 3436 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3437 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3438 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3439 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3440 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3441 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3442 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3443 3444 /* explictly compute R^-T */ 3445 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3446 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3447 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3448 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3449 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3450 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3451 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3452 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3453 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3454 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3455 3456 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3457 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3458 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3459 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3460 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3461 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3462 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3463 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3464 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3465 3466 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3467 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3468 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3469 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3470 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3471 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3472 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3473 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3474 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3475 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3476 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3477 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3478 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3479 3480 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3481 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3482 /* insert cols for primal dofs */ 3483 for (j=0;j<primal_dofs;j++) { 3484 start_vals = &qr_basis[j*size_of_constraint]; 3485 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3486 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3487 } 3488 /* insert cols for dual dofs */ 3489 for (j=0,k=0;j<dual_dofs;k++) { 3490 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3491 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3492 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3493 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3494 j++; 3495 } 3496 } 3497 3498 /* check change of basis */ 3499 if (pcbddc->dbg_flag) { 3500 PetscInt ii,jj; 3501 PetscBool valid_qr=PETSC_TRUE; 3502 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3503 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3504 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3506 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3507 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3508 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3509 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3511 for (jj=0;jj<size_of_constraint;jj++) { 3512 for (ii=0;ii<primal_dofs;ii++) { 3513 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3514 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3515 } 3516 } 3517 if (!valid_qr) { 3518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3519 for (jj=0;jj<size_of_constraint;jj++) { 3520 for (ii=0;ii<primal_dofs;ii++) { 3521 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3522 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3523 } 3524 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3525 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3526 } 3527 } 3528 } 3529 } else { 3530 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3531 } 3532 } 3533 } else { /* simple transformation block */ 3534 PetscInt row,col; 3535 PetscScalar val,norm; 3536 3537 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3538 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3539 for (j=0;j<size_of_constraint;j++) { 3540 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3541 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3542 if (!PetscBTLookup(is_primal,row_B)) { 3543 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3544 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3545 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3546 } else { 3547 for (k=0;k<size_of_constraint;k++) { 3548 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3549 if (row != col) { 3550 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3551 } else { 3552 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3553 } 3554 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3555 } 3556 } 3557 } 3558 if (pcbddc->dbg_flag) { 3559 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3560 } 3561 } 3562 } else { 3563 if (pcbddc->dbg_flag) { 3564 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3565 } 3566 } 3567 } 3568 3569 /* free workspace */ 3570 if (qr_needed) { 3571 if (pcbddc->dbg_flag) { 3572 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3573 } 3574 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3575 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3576 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3577 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3578 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3579 } 3580 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3581 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3582 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3583 3584 /* assembling of global change of variable */ 3585 { 3586 Mat tmat; 3587 PetscInt bs; 3588 3589 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3590 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3591 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3592 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3593 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3594 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3595 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3596 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3597 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3598 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3599 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3600 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3601 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3602 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3603 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3604 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3605 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3606 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3607 } 3608 /* check */ 3609 if (pcbddc->dbg_flag) { 3610 PetscReal error; 3611 Vec x,x_change; 3612 3613 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3614 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3615 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3616 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3617 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3618 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3619 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3620 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3621 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3622 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3623 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3624 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3626 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3627 ierr = VecDestroy(&x);CHKERRQ(ierr); 3628 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3629 } 3630 3631 /* adapt sub_schurs computed (if any) */ 3632 if (pcbddc->use_deluxe_scaling) { 3633 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3634 if (sub_schurs->S_Ej_all) { 3635 Mat S_new,tmat; 3636 ISLocalToGlobalMapping NtoSall; 3637 IS is_all_N,is_V,is_V_Sall; 3638 const PetscScalar *array; 3639 const PetscInt *idxs_V,*idxs_all; 3640 PetscInt i,n_V; 3641 3642 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3643 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3644 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3645 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3646 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3647 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3648 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3649 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3650 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3651 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3652 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3653 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3654 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3655 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3656 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3657 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3658 for (i=0;i<n_V;i++) { 3659 PetscScalar val; 3660 PetscInt idx; 3661 3662 idx = idxs_V[i]; 3663 val = array[idxs_all[idxs_V[i]]]; 3664 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3665 } 3666 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3667 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3668 sub_schurs->S_Ej_all = S_new; 3669 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3670 if (sub_schurs->sum_S_Ej_all) { 3671 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3672 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3673 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3674 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3675 sub_schurs->sum_S_Ej_all = S_new; 3676 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3677 } 3678 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3679 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3680 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3681 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3682 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3683 } 3684 } 3685 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3686 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3687 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3688 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3689 } 3690 3691 /* set up change of basis context */ 3692 if (pcbddc->ChangeOfBasisMatrix) { 3693 PCBDDCChange_ctx change_ctx; 3694 3695 if (!pcbddc->new_global_mat) { 3696 PetscInt global_size,local_size; 3697 3698 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3699 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3700 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3701 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3702 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3703 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3704 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3705 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3706 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3707 } else { 3708 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3709 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3710 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3711 } 3712 if (!pcbddc->user_ChangeOfBasisMatrix) { 3713 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3714 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3715 } else { 3716 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3717 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3718 } 3719 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3720 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3721 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3722 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3723 } 3724 3725 /* add pressure dofs to set of primal nodes for numbering purposes */ 3726 for (i=0;i<pcbddc->benign_n;i++) { 3727 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3728 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3729 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3730 pcbddc->local_primal_size_cc++; 3731 pcbddc->local_primal_size++; 3732 } 3733 3734 /* check if a new primal space has been introduced (also take into account benign trick) */ 3735 pcbddc->new_primal_space_local = PETSC_TRUE; 3736 if (olocal_primal_size == pcbddc->local_primal_size) { 3737 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3738 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3739 if (!pcbddc->new_primal_space_local) { 3740 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3741 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3742 } 3743 } 3744 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3745 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3746 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3747 3748 /* flush dbg viewer */ 3749 if (pcbddc->dbg_flag) { 3750 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3751 } 3752 3753 /* free workspace */ 3754 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3755 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3756 if (!pcbddc->adaptive_selection) { 3757 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3758 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3759 } else { 3760 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3761 pcbddc->adaptive_constraints_idxs_ptr, 3762 pcbddc->adaptive_constraints_data_ptr, 3763 pcbddc->adaptive_constraints_idxs, 3764 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3765 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3766 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3767 } 3768 PetscFunctionReturn(0); 3769 } 3770 3771 #undef __FUNCT__ 3772 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3773 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3774 { 3775 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3776 PC_IS *pcis = (PC_IS*)pc->data; 3777 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3778 PetscInt ierr,i,vertex_size,N; 3779 PetscViewer viewer=pcbddc->dbg_viewer; 3780 3781 PetscFunctionBegin; 3782 /* Reset previously computed graph */ 3783 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3784 /* Init local Graph struct */ 3785 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3786 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3787 3788 /* Check validity of the csr graph passed in by the user */ 3789 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3790 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3791 } 3792 3793 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3794 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3795 PetscInt *xadj,*adjncy; 3796 PetscInt nvtxs; 3797 PetscBool flg_row=PETSC_FALSE; 3798 3799 if (pcbddc->use_local_adj) { 3800 3801 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3802 if (flg_row) { 3803 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3804 pcbddc->computed_rowadj = PETSC_TRUE; 3805 } 3806 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3807 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3808 IS is_dummy; 3809 ISLocalToGlobalMapping l2gmap_dummy; 3810 PetscInt j,sum; 3811 PetscInt *cxadj,*cadjncy; 3812 const PetscInt *idxs; 3813 PCBDDCGraph graph; 3814 PetscBT is_on_boundary; 3815 3816 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3817 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3818 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3819 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3820 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3821 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3822 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3823 if (flg_row) { 3824 graph->xadj = xadj; 3825 graph->adjncy = adjncy; 3826 } 3827 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3828 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3830 3831 if (pcbddc->dbg_flag) { 3832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3833 for (i=0;i<graph->ncc;i++) { 3834 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3835 } 3836 } 3837 3838 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3839 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3840 for (i=0;i<pcis->n_B;i++) { 3841 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3842 } 3843 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3844 3845 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3846 sum = 0; 3847 for (i=0;i<graph->ncc;i++) { 3848 PetscInt sizecc = 0; 3849 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3850 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3851 sizecc++; 3852 } 3853 } 3854 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3855 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3856 cxadj[graph->queue[j]] = sizecc; 3857 } 3858 } 3859 sum += sizecc*sizecc; 3860 } 3861 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3862 sum = 0; 3863 for (i=0;i<pcis->n;i++) { 3864 PetscInt temp = cxadj[i]; 3865 cxadj[i] = sum; 3866 sum += temp; 3867 } 3868 cxadj[pcis->n] = sum; 3869 for (i=0;i<graph->ncc;i++) { 3870 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3871 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3872 PetscInt k,sizecc = 0; 3873 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3874 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3875 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3876 sizecc++; 3877 } 3878 } 3879 } 3880 } 3881 } 3882 if (sum) { 3883 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3884 } else { 3885 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3886 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3887 } 3888 graph->xadj = 0; 3889 graph->adjncy = 0; 3890 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3891 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3892 } 3893 } 3894 if (pcbddc->dbg_flag) { 3895 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3896 } 3897 3898 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3899 vertex_size = 1; 3900 if (pcbddc->user_provided_isfordofs) { 3901 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3902 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3903 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3904 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3905 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3906 } 3907 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3908 pcbddc->n_ISForDofs = 0; 3909 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3910 } 3911 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3912 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3913 } else { 3914 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3915 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3916 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3917 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3918 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3919 } 3920 } 3921 } 3922 3923 /* Setup of Graph */ 3924 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3925 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3926 } 3927 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3928 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3929 } 3930 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 3931 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3932 } 3933 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3934 3935 /* attach info on disconnected subdomains if present */ 3936 if (pcbddc->n_local_subs) { 3937 PetscInt *local_subs; 3938 3939 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3940 for (i=0;i<pcbddc->n_local_subs;i++) { 3941 const PetscInt *idxs; 3942 PetscInt nl,j; 3943 3944 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3945 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3946 for (j=0;j<nl;j++) { 3947 local_subs[idxs[j]] = i; 3948 } 3949 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3950 } 3951 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3952 pcbddc->mat_graph->local_subs = local_subs; 3953 } 3954 3955 /* Graph's connected components analysis */ 3956 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3957 3958 /* print some info to stdout */ 3959 if (pcbddc->dbg_flag) { 3960 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3961 } 3962 3963 /* mark topography has done */ 3964 pcbddc->recompute_topography = PETSC_FALSE; 3965 PetscFunctionReturn(0); 3966 } 3967 3968 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "PCBDDCSubsetNumbering" 3971 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3972 { 3973 PetscSF sf; 3974 PetscLayout map; 3975 const PetscInt *idxs; 3976 PetscInt *leaf_data,*root_data,*gidxs; 3977 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3978 PetscInt n_n,nlocals,start,first_index; 3979 PetscMPIInt commsize; 3980 PetscBool first_found; 3981 PetscErrorCode ierr; 3982 3983 PetscFunctionBegin; 3984 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3985 if (subset_mult) { 3986 PetscCheckSameComm(subset,1,subset_mult,2); 3987 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3988 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3989 } 3990 /* create workspace layout for computing global indices of subset */ 3991 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 3992 lbounds[0] = lbounds[1] = 0; 3993 for (i=0;i<n;i++) { 3994 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 3995 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 3996 } 3997 lbounds[0] = -lbounds[0]; 3998 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 3999 gbounds[0] = -gbounds[0]; 4000 N = gbounds[1] - gbounds[0] + 1; 4001 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4002 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4003 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4004 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4005 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4006 4007 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4008 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4009 if (subset_mult) { 4010 const PetscInt* idxs_mult; 4011 4012 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4013 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4014 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4015 } else { 4016 for (i=0;i<n;i++) leaf_data[i] = 1; 4017 } 4018 /* local size of new subset */ 4019 n_n = 0; 4020 for (i=0;i<n;i++) n_n += leaf_data[i]; 4021 4022 /* global indexes in layout */ 4023 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4024 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4025 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4026 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4027 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4028 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4029 4030 /* reduce from leaves to roots */ 4031 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4032 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4033 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4034 4035 /* count indexes in local part of layout */ 4036 nlocals = 0; 4037 first_index = -1; 4038 first_found = PETSC_FALSE; 4039 for (i=0;i<Nl;i++) { 4040 if (!first_found && root_data[i]) { 4041 first_found = PETSC_TRUE; 4042 first_index = i; 4043 } 4044 nlocals += root_data[i]; 4045 } 4046 4047 /* cumulative of number of indexes and size of subset without holes */ 4048 #if defined(PETSC_HAVE_MPI_EXSCAN) 4049 start = 0; 4050 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4051 #else 4052 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4053 start = start-nlocals; 4054 #endif 4055 4056 if (N_n) { /* compute total size of new subset if requested */ 4057 *N_n = start + nlocals; 4058 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4059 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4060 } 4061 4062 /* adapt root data with cumulative */ 4063 if (first_found) { 4064 PetscInt old_index; 4065 4066 root_data[first_index] += start; 4067 old_index = first_index; 4068 for (i=first_index+1;i<Nl;i++) { 4069 if (root_data[i]) { 4070 root_data[i] += root_data[old_index]; 4071 old_index = i; 4072 } 4073 } 4074 } 4075 4076 /* from roots to leaves */ 4077 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4078 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4079 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4080 4081 /* create new IS with global indexes without holes */ 4082 if (subset_mult) { 4083 const PetscInt* idxs_mult; 4084 PetscInt cum; 4085 4086 cum = 0; 4087 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4088 for (i=0;i<n;i++) { 4089 PetscInt j; 4090 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4091 } 4092 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4093 } else { 4094 for (i=0;i<n;i++) { 4095 gidxs[i] = leaf_data[i]-1; 4096 } 4097 } 4098 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4099 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4100 PetscFunctionReturn(0); 4101 } 4102 4103 #undef __FUNCT__ 4104 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4105 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4106 { 4107 PetscInt i,j; 4108 PetscScalar *alphas; 4109 PetscErrorCode ierr; 4110 4111 PetscFunctionBegin; 4112 /* this implements stabilized Gram-Schmidt */ 4113 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4114 for (i=0;i<n;i++) { 4115 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4116 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4117 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4118 } 4119 ierr = PetscFree(alphas);CHKERRQ(ierr); 4120 PetscFunctionReturn(0); 4121 } 4122 4123 #undef __FUNCT__ 4124 #define __FUNCT__ "MatISGetSubassemblingPattern" 4125 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4126 { 4127 IS ranks_send_to; 4128 PetscInt n_neighs,*neighs,*n_shared,**shared; 4129 PetscMPIInt size,rank,color; 4130 PetscInt *xadj,*adjncy; 4131 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4132 PetscInt i,local_size,threshold=0; 4133 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4134 PetscSubcomm subcomm; 4135 PetscErrorCode ierr; 4136 4137 PetscFunctionBegin; 4138 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4139 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4140 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4141 4142 /* Get info on mapping */ 4143 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4144 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4145 4146 /* build local CSR graph of subdomains' connectivity */ 4147 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4148 xadj[0] = 0; 4149 xadj[1] = PetscMax(n_neighs-1,0); 4150 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4151 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4152 4153 if (threshold) { 4154 PetscInt xadj_count = 0; 4155 for (i=1;i<n_neighs;i++) { 4156 if (n_shared[i] > threshold) { 4157 adjncy[xadj_count] = neighs[i]; 4158 adjncy_wgt[xadj_count] = n_shared[i]; 4159 xadj_count++; 4160 } 4161 } 4162 xadj[1] = xadj_count; 4163 } else { 4164 if (xadj[1]) { 4165 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4166 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4167 } 4168 } 4169 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4170 if (use_square) { 4171 for (i=0;i<xadj[1];i++) { 4172 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4173 } 4174 } 4175 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4176 4177 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4178 4179 /* 4180 Restrict work on active processes only. 4181 */ 4182 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4183 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4184 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4185 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4186 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4187 if (color) { 4188 ierr = PetscFree(xadj);CHKERRQ(ierr); 4189 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4190 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4191 } else { 4192 Mat subdomain_adj; 4193 IS new_ranks,new_ranks_contig; 4194 MatPartitioning partitioner; 4195 PetscInt prank,rstart=0,rend=0; 4196 PetscInt *is_indices,*oldranks; 4197 PetscBool aggregate; 4198 4199 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4200 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4201 prank = rank; 4202 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4203 /* 4204 for (i=0;i<size;i++) { 4205 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4206 } 4207 */ 4208 for (i=0;i<xadj[1];i++) { 4209 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4210 } 4211 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4212 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4213 if (aggregate) { 4214 PetscInt lrows,row,ncols,*cols; 4215 PetscMPIInt nrank; 4216 PetscScalar *vals; 4217 4218 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4219 lrows = 0; 4220 if (nrank<redprocs) { 4221 lrows = size/redprocs; 4222 if (nrank<size%redprocs) lrows++; 4223 } 4224 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4225 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4226 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4227 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4228 row = nrank; 4229 ncols = xadj[1]-xadj[0]; 4230 cols = adjncy; 4231 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4232 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4233 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4234 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4235 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4236 ierr = PetscFree(xadj);CHKERRQ(ierr); 4237 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4238 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4239 ierr = PetscFree(vals);CHKERRQ(ierr); 4240 } else { 4241 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4242 } 4243 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4244 4245 /* Partition */ 4246 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4247 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4248 if (use_vwgt) { 4249 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4250 v_wgt[0] = local_size; 4251 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4252 } 4253 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4254 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4255 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4256 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4257 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4258 4259 /* renumber new_ranks to avoid "holes" in new set of processors */ 4260 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4261 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4262 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4263 if (!redprocs) { 4264 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4265 } else { 4266 PetscInt idxs[1]; 4267 PetscMPIInt tag; 4268 MPI_Request *reqs; 4269 4270 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4271 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4272 for (i=rstart;i<rend;i++) { 4273 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4274 } 4275 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4276 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4277 ierr = PetscFree(reqs);CHKERRQ(ierr); 4278 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4279 } 4280 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4281 /* clean up */ 4282 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4283 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4284 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4285 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4286 } 4287 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4288 4289 /* assemble parallel IS for sends */ 4290 i = 1; 4291 if (color) i=0; 4292 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4293 /* get back IS */ 4294 *is_sends = ranks_send_to; 4295 PetscFunctionReturn(0); 4296 } 4297 4298 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4299 4300 #undef __FUNCT__ 4301 #define __FUNCT__ "MatISSubassemble" 4302 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4303 { 4304 Mat local_mat; 4305 IS is_sends_internal; 4306 PetscInt rows,cols,new_local_rows; 4307 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4308 PetscBool ismatis,isdense,newisdense,destroy_mat; 4309 ISLocalToGlobalMapping l2gmap; 4310 PetscInt* l2gmap_indices; 4311 const PetscInt* is_indices; 4312 MatType new_local_type; 4313 /* buffers */ 4314 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4315 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4316 PetscInt *recv_buffer_idxs_local; 4317 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4318 /* MPI */ 4319 MPI_Comm comm,comm_n; 4320 PetscSubcomm subcomm; 4321 PetscMPIInt n_sends,n_recvs,commsize; 4322 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4323 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4324 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4325 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4326 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4327 PetscErrorCode ierr; 4328 4329 PetscFunctionBegin; 4330 /* TODO: add missing checks */ 4331 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4332 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4333 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4334 PetscValidLogicalCollectiveInt(mat,nis,7); 4335 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4336 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4337 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4338 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4339 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4340 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4341 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4342 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4343 PetscInt mrows,mcols,mnrows,mncols; 4344 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4345 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4346 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4347 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4348 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4349 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4350 } 4351 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4352 PetscValidLogicalCollectiveInt(mat,bs,0); 4353 /* prepare IS for sending if not provided */ 4354 if (!is_sends) { 4355 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4356 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4357 } else { 4358 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4359 is_sends_internal = is_sends; 4360 } 4361 4362 /* get comm */ 4363 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4364 4365 /* compute number of sends */ 4366 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4367 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4368 4369 /* compute number of receives */ 4370 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4371 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4372 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4373 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4374 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4375 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4376 ierr = PetscFree(iflags);CHKERRQ(ierr); 4377 4378 /* restrict comm if requested */ 4379 subcomm = 0; 4380 destroy_mat = PETSC_FALSE; 4381 if (restrict_comm) { 4382 PetscMPIInt color,subcommsize; 4383 4384 color = 0; 4385 if (restrict_full) { 4386 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4387 } else { 4388 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4389 } 4390 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4391 subcommsize = commsize - subcommsize; 4392 /* check if reuse has been requested */ 4393 if (reuse == MAT_REUSE_MATRIX) { 4394 if (*mat_n) { 4395 PetscMPIInt subcommsize2; 4396 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4397 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4398 comm_n = PetscObjectComm((PetscObject)*mat_n); 4399 } else { 4400 comm_n = PETSC_COMM_SELF; 4401 } 4402 } else { /* MAT_INITIAL_MATRIX */ 4403 PetscMPIInt rank; 4404 4405 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4406 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4407 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4408 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4409 comm_n = PetscSubcommChild(subcomm); 4410 } 4411 /* flag to destroy *mat_n if not significative */ 4412 if (color) destroy_mat = PETSC_TRUE; 4413 } else { 4414 comm_n = comm; 4415 } 4416 4417 /* prepare send/receive buffers */ 4418 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4419 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4420 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4421 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4422 if (nis) { 4423 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4424 } 4425 4426 /* Get data from local matrices */ 4427 if (!isdense) { 4428 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4429 /* TODO: See below some guidelines on how to prepare the local buffers */ 4430 /* 4431 send_buffer_vals should contain the raw values of the local matrix 4432 send_buffer_idxs should contain: 4433 - MatType_PRIVATE type 4434 - PetscInt size_of_l2gmap 4435 - PetscInt global_row_indices[size_of_l2gmap] 4436 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4437 */ 4438 } else { 4439 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4440 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4441 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4442 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4443 send_buffer_idxs[1] = i; 4444 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4445 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4446 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4447 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4448 for (i=0;i<n_sends;i++) { 4449 ilengths_vals[is_indices[i]] = len*len; 4450 ilengths_idxs[is_indices[i]] = len+2; 4451 } 4452 } 4453 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4454 /* additional is (if any) */ 4455 if (nis) { 4456 PetscMPIInt psum; 4457 PetscInt j; 4458 for (j=0,psum=0;j<nis;j++) { 4459 PetscInt plen; 4460 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4461 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4462 psum += len+1; /* indices + lenght */ 4463 } 4464 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4465 for (j=0,psum=0;j<nis;j++) { 4466 PetscInt plen; 4467 const PetscInt *is_array_idxs; 4468 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4469 send_buffer_idxs_is[psum] = plen; 4470 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4471 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4472 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4473 psum += plen+1; /* indices + lenght */ 4474 } 4475 for (i=0;i<n_sends;i++) { 4476 ilengths_idxs_is[is_indices[i]] = psum; 4477 } 4478 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4479 } 4480 4481 buf_size_idxs = 0; 4482 buf_size_vals = 0; 4483 buf_size_idxs_is = 0; 4484 for (i=0;i<n_recvs;i++) { 4485 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4486 buf_size_vals += (PetscInt)olengths_vals[i]; 4487 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4488 } 4489 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4490 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4491 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4492 4493 /* get new tags for clean communications */ 4494 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4495 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4496 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4497 4498 /* allocate for requests */ 4499 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4500 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4501 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4502 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4503 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4504 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4505 4506 /* communications */ 4507 ptr_idxs = recv_buffer_idxs; 4508 ptr_vals = recv_buffer_vals; 4509 ptr_idxs_is = recv_buffer_idxs_is; 4510 for (i=0;i<n_recvs;i++) { 4511 source_dest = onodes[i]; 4512 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4513 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4514 ptr_idxs += olengths_idxs[i]; 4515 ptr_vals += olengths_vals[i]; 4516 if (nis) { 4517 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4518 ptr_idxs_is += olengths_idxs_is[i]; 4519 } 4520 } 4521 for (i=0;i<n_sends;i++) { 4522 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4523 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4524 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4525 if (nis) { 4526 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4527 } 4528 } 4529 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4530 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4531 4532 /* assemble new l2g map */ 4533 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4534 ptr_idxs = recv_buffer_idxs; 4535 new_local_rows = 0; 4536 for (i=0;i<n_recvs;i++) { 4537 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4538 ptr_idxs += olengths_idxs[i]; 4539 } 4540 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4541 ptr_idxs = recv_buffer_idxs; 4542 new_local_rows = 0; 4543 for (i=0;i<n_recvs;i++) { 4544 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4545 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4546 ptr_idxs += olengths_idxs[i]; 4547 } 4548 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4549 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4550 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4551 4552 /* infer new local matrix type from received local matrices type */ 4553 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4554 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4555 if (n_recvs) { 4556 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4557 ptr_idxs = recv_buffer_idxs; 4558 for (i=0;i<n_recvs;i++) { 4559 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4560 new_local_type_private = MATAIJ_PRIVATE; 4561 break; 4562 } 4563 ptr_idxs += olengths_idxs[i]; 4564 } 4565 switch (new_local_type_private) { 4566 case MATDENSE_PRIVATE: 4567 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4568 new_local_type = MATSEQAIJ; 4569 bs = 1; 4570 } else { /* if I receive only 1 dense matrix */ 4571 new_local_type = MATSEQDENSE; 4572 bs = 1; 4573 } 4574 break; 4575 case MATAIJ_PRIVATE: 4576 new_local_type = MATSEQAIJ; 4577 bs = 1; 4578 break; 4579 case MATBAIJ_PRIVATE: 4580 new_local_type = MATSEQBAIJ; 4581 break; 4582 case MATSBAIJ_PRIVATE: 4583 new_local_type = MATSEQSBAIJ; 4584 break; 4585 default: 4586 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4587 break; 4588 } 4589 } else { /* by default, new_local_type is seqdense */ 4590 new_local_type = MATSEQDENSE; 4591 bs = 1; 4592 } 4593 4594 /* create MATIS object if needed */ 4595 if (reuse == MAT_INITIAL_MATRIX) { 4596 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4597 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4598 } else { 4599 /* it also destroys the local matrices */ 4600 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4601 } 4602 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4603 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4604 4605 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4606 4607 /* Global to local map of received indices */ 4608 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4609 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4610 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4611 4612 /* restore attributes -> type of incoming data and its size */ 4613 buf_size_idxs = 0; 4614 for (i=0;i<n_recvs;i++) { 4615 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4616 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4617 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4618 } 4619 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4620 4621 /* set preallocation */ 4622 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4623 if (!newisdense) { 4624 PetscInt *new_local_nnz=0; 4625 4626 ptr_vals = recv_buffer_vals; 4627 ptr_idxs = recv_buffer_idxs_local; 4628 if (n_recvs) { 4629 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4630 } 4631 for (i=0;i<n_recvs;i++) { 4632 PetscInt j; 4633 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4634 for (j=0;j<*(ptr_idxs+1);j++) { 4635 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4636 } 4637 } else { 4638 /* TODO */ 4639 } 4640 ptr_idxs += olengths_idxs[i]; 4641 } 4642 if (new_local_nnz) { 4643 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4644 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4645 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4646 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4647 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4648 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4649 } else { 4650 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4651 } 4652 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4653 } else { 4654 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4655 } 4656 4657 /* set values */ 4658 ptr_vals = recv_buffer_vals; 4659 ptr_idxs = recv_buffer_idxs_local; 4660 for (i=0;i<n_recvs;i++) { 4661 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4662 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4663 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4664 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4665 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4666 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4667 } else { 4668 /* TODO */ 4669 } 4670 ptr_idxs += olengths_idxs[i]; 4671 ptr_vals += olengths_vals[i]; 4672 } 4673 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4674 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4675 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4676 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4677 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4678 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4679 4680 #if 0 4681 if (!restrict_comm) { /* check */ 4682 Vec lvec,rvec; 4683 PetscReal infty_error; 4684 4685 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4686 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4687 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4688 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4689 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4690 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4691 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4692 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4693 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4694 } 4695 #endif 4696 4697 /* assemble new additional is (if any) */ 4698 if (nis) { 4699 PetscInt **temp_idxs,*count_is,j,psum; 4700 4701 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4702 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4703 ptr_idxs = recv_buffer_idxs_is; 4704 psum = 0; 4705 for (i=0;i<n_recvs;i++) { 4706 for (j=0;j<nis;j++) { 4707 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4708 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4709 psum += plen; 4710 ptr_idxs += plen+1; /* shift pointer to received data */ 4711 } 4712 } 4713 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4714 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4715 for (i=1;i<nis;i++) { 4716 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4717 } 4718 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4719 ptr_idxs = recv_buffer_idxs_is; 4720 for (i=0;i<n_recvs;i++) { 4721 for (j=0;j<nis;j++) { 4722 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4723 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4724 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4725 ptr_idxs += plen+1; /* shift pointer to received data */ 4726 } 4727 } 4728 for (i=0;i<nis;i++) { 4729 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4730 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4731 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4732 } 4733 ierr = PetscFree(count_is);CHKERRQ(ierr); 4734 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4735 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4736 } 4737 /* free workspace */ 4738 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4739 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4740 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4741 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4742 if (isdense) { 4743 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4744 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4745 } else { 4746 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4747 } 4748 if (nis) { 4749 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4750 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4751 } 4752 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4753 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4754 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4755 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4756 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4757 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4758 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4759 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4760 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4761 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4762 ierr = PetscFree(onodes);CHKERRQ(ierr); 4763 if (nis) { 4764 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4765 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4766 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4767 } 4768 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4769 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4770 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4771 for (i=0;i<nis;i++) { 4772 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4773 } 4774 *mat_n = NULL; 4775 } 4776 PetscFunctionReturn(0); 4777 } 4778 4779 /* temporary hack into ksp private data structure */ 4780 #include <petsc/private/kspimpl.h> 4781 4782 #undef __FUNCT__ 4783 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4784 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4785 { 4786 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4787 PC_IS *pcis = (PC_IS*)pc->data; 4788 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4789 MatNullSpace CoarseNullSpace=NULL; 4790 ISLocalToGlobalMapping coarse_islg; 4791 IS coarse_is,*isarray; 4792 PetscInt i,im_active=-1,active_procs=-1; 4793 PetscInt nis,nisdofs,nisneu,nisvert; 4794 PC pc_temp; 4795 PCType coarse_pc_type; 4796 KSPType coarse_ksp_type; 4797 PetscBool multilevel_requested,multilevel_allowed; 4798 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4799 Mat t_coarse_mat_is; 4800 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4801 PetscMPIInt all_procs; 4802 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4803 PetscBool compute_vecs = PETSC_FALSE; 4804 PetscScalar *array; 4805 PetscErrorCode ierr; 4806 4807 PetscFunctionBegin; 4808 /* Assign global numbering to coarse dofs */ 4809 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4810 PetscInt ocoarse_size; 4811 compute_vecs = PETSC_TRUE; 4812 ocoarse_size = pcbddc->coarse_size; 4813 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4814 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4815 /* see if we can avoid some work */ 4816 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4817 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4818 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4819 PC pc; 4820 PetscBool isbddc; 4821 4822 /* temporary workaround since PCBDDC does not have a reset method so far */ 4823 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4824 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4825 if (isbddc) { 4826 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4827 } 4828 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4829 coarse_reuse = PETSC_FALSE; 4830 } else { /* we can safely reuse already computed coarse matrix */ 4831 coarse_reuse = PETSC_TRUE; 4832 } 4833 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4834 coarse_reuse = PETSC_FALSE; 4835 } 4836 /* reset any subassembling information */ 4837 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4838 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4839 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4840 coarse_reuse = PETSC_TRUE; 4841 } 4842 4843 /* count "active" (i.e. with positive local size) and "void" processes */ 4844 im_active = !!(pcis->n); 4845 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4846 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4847 void_procs = all_procs-active_procs; 4848 csin_type_simple = PETSC_TRUE; 4849 redist = PETSC_FALSE; 4850 if (pcbddc->current_level && void_procs) { 4851 csin_ml = PETSC_TRUE; 4852 ncoarse_ml = void_procs; 4853 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4854 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4855 csin_ds = PETSC_TRUE; 4856 ncoarse_ds = pcbddc->redistribute_coarse; 4857 redist = PETSC_TRUE; 4858 } else { 4859 csin_ds = PETSC_TRUE; 4860 ncoarse_ds = active_procs; 4861 redist = PETSC_TRUE; 4862 } 4863 } else { 4864 csin_ml = PETSC_FALSE; 4865 ncoarse_ml = all_procs; 4866 if (void_procs) { 4867 csin_ds = PETSC_TRUE; 4868 ncoarse_ds = void_procs; 4869 csin_type_simple = PETSC_FALSE; 4870 } else { 4871 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4872 csin_ds = PETSC_TRUE; 4873 ncoarse_ds = pcbddc->redistribute_coarse; 4874 redist = PETSC_TRUE; 4875 } else { 4876 csin_ds = PETSC_FALSE; 4877 ncoarse_ds = all_procs; 4878 } 4879 } 4880 } 4881 4882 /* 4883 test if we can go multilevel: three conditions must be satisfied: 4884 - we have not exceeded the number of levels requested 4885 - we can actually subassemble the active processes 4886 - we can find a suitable number of MPI processes where we can place the subassembled problem 4887 */ 4888 multilevel_allowed = PETSC_FALSE; 4889 multilevel_requested = PETSC_FALSE; 4890 if (pcbddc->current_level < pcbddc->max_levels) { 4891 multilevel_requested = PETSC_TRUE; 4892 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4893 multilevel_allowed = PETSC_FALSE; 4894 } else { 4895 multilevel_allowed = PETSC_TRUE; 4896 } 4897 } 4898 /* determine number of process partecipating to coarse solver */ 4899 if (multilevel_allowed) { 4900 ncoarse = ncoarse_ml; 4901 csin = csin_ml; 4902 redist = PETSC_FALSE; 4903 } else { 4904 ncoarse = ncoarse_ds; 4905 csin = csin_ds; 4906 } 4907 4908 /* creates temporary l2gmap and IS for coarse indexes */ 4909 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4910 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4911 4912 /* creates temporary MATIS object for coarse matrix */ 4913 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4914 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4915 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4916 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4917 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4918 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4919 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4920 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4921 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4922 4923 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4924 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 4925 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4926 const PetscInt *idxs; 4927 ISLocalToGlobalMapping tmap; 4928 4929 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4930 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4931 /* allocate space for temporary storage */ 4932 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4933 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4934 /* allocate for IS array */ 4935 nisdofs = pcbddc->n_ISForDofsLocal; 4936 nisneu = !!pcbddc->NeumannBoundariesLocal; 4937 nisvert = 0; 4938 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4939 nisvert = 1; 4940 } 4941 nis = nisdofs + nisneu + nisvert; 4942 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4943 /* dofs splitting */ 4944 for (i=0;i<nisdofs;i++) { 4945 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4946 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4947 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4948 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4949 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4950 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4951 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4952 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4953 } 4954 /* neumann boundaries */ 4955 if (pcbddc->NeumannBoundariesLocal) { 4956 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4957 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4958 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4959 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4960 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4961 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4962 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4963 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4964 } 4965 /* primal vertices (benign) */ 4966 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4967 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 4968 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4969 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4970 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4971 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4972 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 4973 } 4974 /* free memory */ 4975 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4976 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4977 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4978 } else { 4979 nis = 0; 4980 nisdofs = 0; 4981 nisneu = 0; 4982 nisvert = 0; 4983 isarray = NULL; 4984 } 4985 /* destroy no longer needed map */ 4986 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4987 4988 /* restrict on coarse candidates (if needed) */ 4989 coarse_mat_is = NULL; 4990 if (csin) { 4991 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 4992 if (redist) { 4993 PetscMPIInt rank; 4994 PetscInt spc,n_spc_p1,dest[1],destsize; 4995 4996 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 4997 spc = active_procs/ncoarse; 4998 n_spc_p1 = active_procs%ncoarse; 4999 if (im_active) { 5000 destsize = 1; 5001 if (rank > n_spc_p1*(spc+1)-1) { 5002 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5003 } else { 5004 dest[0] = rank/(spc+1); 5005 } 5006 } else { 5007 destsize = 0; 5008 } 5009 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5010 } else if (csin_type_simple) { 5011 PetscMPIInt rank; 5012 PetscInt issize,isidx; 5013 5014 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5015 if (im_active) { 5016 issize = 1; 5017 isidx = (PetscInt)rank; 5018 } else { 5019 issize = 0; 5020 isidx = -1; 5021 } 5022 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5023 } else { /* get a suitable subassembling pattern from MATIS code */ 5024 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5025 } 5026 5027 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5028 if (!redist || ncoarse <= void_procs) { 5029 PetscInt ncoarse_cand,tissize,*nisindices; 5030 PetscInt *coarse_candidates; 5031 const PetscInt* tisindices; 5032 5033 /* get coarse candidates' ranks in pc communicator */ 5034 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5035 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5036 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5037 if (!coarse_candidates[i]) { 5038 coarse_candidates[ncoarse_cand++]=i; 5039 } 5040 } 5041 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5042 5043 5044 if (pcbddc->dbg_flag) { 5045 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5046 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5047 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5048 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5049 for (i=0;i<ncoarse_cand;i++) { 5050 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5051 } 5052 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5053 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5054 } 5055 /* shift the pattern on coarse candidates */ 5056 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5057 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5058 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5059 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5060 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5061 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5062 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5063 } 5064 if (pcbddc->dbg_flag) { 5065 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5066 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5067 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5068 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5069 } 5070 } 5071 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5072 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5073 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5074 } else { /* this is the last level, so use just receiving processes in subcomm */ 5075 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5076 } 5077 } else { 5078 if (pcbddc->dbg_flag) { 5079 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5080 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5081 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5082 } 5083 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5084 coarse_mat_is = t_coarse_mat_is; 5085 } 5086 5087 /* create local to global scatters for coarse problem */ 5088 if (compute_vecs) { 5089 PetscInt lrows; 5090 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5091 if (coarse_mat_is) { 5092 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5093 } else { 5094 lrows = 0; 5095 } 5096 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5097 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5098 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5099 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5100 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5101 } 5102 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5103 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5104 5105 /* set defaults for coarse KSP and PC */ 5106 if (multilevel_allowed) { 5107 coarse_ksp_type = KSPRICHARDSON; 5108 coarse_pc_type = PCBDDC; 5109 } else { 5110 coarse_ksp_type = KSPPREONLY; 5111 coarse_pc_type = PCREDUNDANT; 5112 } 5113 5114 /* print some info if requested */ 5115 if (pcbddc->dbg_flag) { 5116 if (!multilevel_allowed) { 5117 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5118 if (multilevel_requested) { 5119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5120 } else if (pcbddc->max_levels) { 5121 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5122 } 5123 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5124 } 5125 } 5126 5127 /* create the coarse KSP object only once with defaults */ 5128 if (coarse_mat_is) { 5129 MatReuse coarse_mat_reuse; 5130 PetscViewer dbg_viewer = NULL; 5131 if (pcbddc->dbg_flag) { 5132 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5133 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5134 } 5135 if (!pcbddc->coarse_ksp) { 5136 char prefix[256],str_level[16]; 5137 size_t len; 5138 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5139 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5140 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5141 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5142 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5143 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5144 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5145 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5146 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5147 /* prefix */ 5148 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5149 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5150 if (!pcbddc->current_level) { 5151 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5152 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5153 } else { 5154 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5155 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5156 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5157 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5158 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5159 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5160 } 5161 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5162 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5163 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5164 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5165 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5166 /* allow user customization */ 5167 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5168 } 5169 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5170 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5171 if (nisdofs) { 5172 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5173 for (i=0;i<nisdofs;i++) { 5174 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5175 } 5176 } 5177 if (nisneu) { 5178 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5179 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5180 } 5181 if (nisvert) { 5182 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5183 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5184 } 5185 5186 /* get some info after set from options */ 5187 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5188 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5189 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5190 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5191 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5192 isbddc = PETSC_FALSE; 5193 } 5194 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5195 if (isredundant) { 5196 KSP inner_ksp; 5197 PC inner_pc; 5198 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5199 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5200 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5201 } 5202 5203 /* assemble coarse matrix */ 5204 if (coarse_reuse) { 5205 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5206 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5207 coarse_mat_reuse = MAT_REUSE_MATRIX; 5208 } else { 5209 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5210 } 5211 if (isbddc || isnn) { 5212 if (pcbddc->coarsening_ratio > 1) { 5213 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5214 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5215 if (pcbddc->dbg_flag) { 5216 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5217 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5218 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5219 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5220 } 5221 } 5222 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5223 } else { 5224 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5225 coarse_mat = coarse_mat_is; 5226 } 5227 } else { 5228 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5229 } 5230 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5231 5232 /* propagate symmetry info of coarse matrix */ 5233 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5234 if (pc->pmat->symmetric_set) { 5235 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5236 } 5237 if (pc->pmat->hermitian_set) { 5238 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5239 } 5240 if (pc->pmat->spd_set) { 5241 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5242 } 5243 /* set operators */ 5244 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5245 if (pcbddc->dbg_flag) { 5246 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5247 } 5248 } else { /* processes non partecipating to coarse solver (if any) */ 5249 coarse_mat = 0; 5250 } 5251 ierr = PetscFree(isarray);CHKERRQ(ierr); 5252 #if 0 5253 { 5254 PetscViewer viewer; 5255 char filename[256]; 5256 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5257 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5258 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5259 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5260 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5261 } 5262 #endif 5263 5264 /* Compute coarse null space (special handling by BDDC only) */ 5265 #if 0 5266 if (pcbddc->NullSpace) { 5267 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5268 } 5269 #endif 5270 /* hack */ 5271 if (pcbddc->coarse_ksp) { 5272 Vec crhs,csol; 5273 5274 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5275 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5276 if (!csol) { 5277 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5278 } 5279 if (!crhs) { 5280 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5281 } 5282 } 5283 5284 /* compute null space for coarse solver if the benign trick has been requested */ 5285 if (pcbddc->benign_null) { 5286 5287 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5288 for (i=0;i<pcbddc->benign_n;i++) { 5289 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5290 } 5291 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5292 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5293 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5294 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5295 if (coarse_mat) { 5296 Vec nullv; 5297 PetscScalar *array,*array2; 5298 PetscInt nl; 5299 5300 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5301 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5302 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5303 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5304 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5305 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5306 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5307 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5308 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5309 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5310 } 5311 } 5312 5313 if (pcbddc->coarse_ksp) { 5314 PetscBool ispreonly; 5315 5316 if (CoarseNullSpace) { 5317 PetscBool isnull; 5318 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5319 if (isnull) { 5320 if (isbddc && !pcbddc->benign_saddle_point) { 5321 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5322 } else { 5323 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5324 } 5325 } else { 5326 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5327 } 5328 } 5329 /* setup coarse ksp */ 5330 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5331 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5332 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5333 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5334 KSP check_ksp; 5335 KSPType check_ksp_type; 5336 PC check_pc; 5337 Vec check_vec,coarse_vec; 5338 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5339 PetscInt its; 5340 PetscBool compute_eigs; 5341 PetscReal *eigs_r,*eigs_c; 5342 PetscInt neigs; 5343 const char *prefix; 5344 5345 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5346 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5347 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5348 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5349 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5350 if (ispreonly) { 5351 check_ksp_type = KSPPREONLY; 5352 compute_eigs = PETSC_FALSE; 5353 } else { 5354 check_ksp_type = KSPGMRES; 5355 compute_eigs = PETSC_TRUE; 5356 } 5357 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5358 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5359 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5360 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5361 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5362 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5363 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5364 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5365 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5366 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5367 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5368 /* create random vec */ 5369 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5370 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5371 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5372 if (CoarseNullSpace) { 5373 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5374 } 5375 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5376 /* solve coarse problem */ 5377 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5378 if (CoarseNullSpace) { 5379 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5380 } 5381 /* set eigenvalue estimation if preonly has not been requested */ 5382 if (compute_eigs) { 5383 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5384 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5385 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5386 lambda_max = eigs_r[neigs-1]; 5387 lambda_min = eigs_r[0]; 5388 if (pcbddc->use_coarse_estimates) { 5389 if (lambda_max>lambda_min) { 5390 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5391 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5392 } 5393 } 5394 } 5395 5396 /* check coarse problem residual error */ 5397 if (pcbddc->dbg_flag) { 5398 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5399 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5400 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5401 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5402 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5403 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5404 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5405 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5406 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5407 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5408 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5409 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5410 if (CoarseNullSpace) { 5411 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5412 } 5413 if (compute_eigs) { 5414 PetscReal lambda_max_s,lambda_min_s; 5415 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5416 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5417 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5418 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5419 for (i=0;i<neigs;i++) { 5420 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5421 } 5422 } 5423 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5424 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5425 } 5426 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5427 if (compute_eigs) { 5428 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5429 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5430 } 5431 } 5432 } 5433 /* print additional info */ 5434 if (pcbddc->dbg_flag) { 5435 /* waits until all processes reaches this point */ 5436 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5437 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5438 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5439 } 5440 5441 /* free memory */ 5442 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5443 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5444 PetscFunctionReturn(0); 5445 } 5446 5447 #undef __FUNCT__ 5448 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5449 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5450 { 5451 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5452 PC_IS* pcis = (PC_IS*)pc->data; 5453 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5454 IS subset,subset_mult,subset_n; 5455 PetscInt local_size,coarse_size=0; 5456 PetscInt *local_primal_indices=NULL; 5457 const PetscInt *t_local_primal_indices; 5458 PetscErrorCode ierr; 5459 5460 PetscFunctionBegin; 5461 /* Compute global number of coarse dofs */ 5462 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5463 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5464 } 5465 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5466 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5467 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5468 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5469 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5470 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5471 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5472 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5473 if (local_size != pcbddc->local_primal_size) { 5474 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5475 } 5476 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5477 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5478 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5479 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5480 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5481 5482 /* check numbering */ 5483 if (pcbddc->dbg_flag) { 5484 PetscScalar coarsesum,*array,*array2; 5485 PetscInt i; 5486 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5487 5488 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5489 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5490 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5491 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 5492 /* counter */ 5493 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5494 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5495 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5496 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5497 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5498 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5499 5500 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5501 for (i=0;i<pcbddc->local_primal_size;i++) { 5502 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5503 } 5504 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5505 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5506 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5507 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5508 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5509 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5510 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5511 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5512 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5513 for (i=0;i<pcis->n;i++) { 5514 if (array[i] != 0.0 && array[i] != array2[i]) { 5515 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5516 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5517 set_error = PETSC_TRUE; 5518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5519 } 5520 } 5521 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5522 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5523 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5524 for (i=0;i<pcis->n;i++) { 5525 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5526 } 5527 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5528 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5529 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5530 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5531 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5532 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5533 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5534 PetscInt *gidxs; 5535 5536 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5537 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5538 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5539 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5541 for (i=0;i<pcbddc->local_primal_size;i++) { 5542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5543 } 5544 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5545 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5546 } 5547 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5548 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5549 } 5550 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5551 /* get back data */ 5552 *coarse_size_n = coarse_size; 5553 *local_primal_indices_n = local_primal_indices; 5554 PetscFunctionReturn(0); 5555 } 5556 5557 #undef __FUNCT__ 5558 #define __FUNCT__ "PCBDDCGlobalToLocal" 5559 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5560 { 5561 IS localis_t; 5562 PetscInt i,lsize,*idxs,n; 5563 PetscScalar *vals; 5564 PetscErrorCode ierr; 5565 5566 PetscFunctionBegin; 5567 /* get indices in local ordering exploiting local to global map */ 5568 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5569 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5570 for (i=0;i<lsize;i++) vals[i] = 1.0; 5571 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5572 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5573 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5574 if (idxs) { /* multilevel guard */ 5575 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5576 } 5577 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5578 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5579 ierr = PetscFree(vals);CHKERRQ(ierr); 5580 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5581 /* now compute set in local ordering */ 5582 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5583 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5584 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5585 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5586 for (i=0,lsize=0;i<n;i++) { 5587 if (PetscRealPart(vals[i]) > 0.5) { 5588 lsize++; 5589 } 5590 } 5591 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5592 for (i=0,lsize=0;i<n;i++) { 5593 if (PetscRealPart(vals[i]) > 0.5) { 5594 idxs[lsize++] = i; 5595 } 5596 } 5597 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5598 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5599 *localis = localis_t; 5600 PetscFunctionReturn(0); 5601 } 5602 5603 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5604 #undef __FUNCT__ 5605 #define __FUNCT__ "PCBDDCMatMult_Private" 5606 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5607 { 5608 PCBDDCChange_ctx change_ctx; 5609 PetscErrorCode ierr; 5610 5611 PetscFunctionBegin; 5612 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5613 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5614 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5615 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5616 PetscFunctionReturn(0); 5617 } 5618 5619 #undef __FUNCT__ 5620 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5621 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5622 { 5623 PCBDDCChange_ctx change_ctx; 5624 PetscErrorCode ierr; 5625 5626 PetscFunctionBegin; 5627 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5628 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5629 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5630 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5631 PetscFunctionReturn(0); 5632 } 5633 5634 #undef __FUNCT__ 5635 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5636 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5637 { 5638 PC_IS *pcis=(PC_IS*)pc->data; 5639 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5640 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5641 Mat S_j; 5642 PetscInt *used_xadj,*used_adjncy; 5643 PetscBool free_used_adj; 5644 PetscErrorCode ierr; 5645 5646 PetscFunctionBegin; 5647 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5648 free_used_adj = PETSC_FALSE; 5649 if (pcbddc->sub_schurs_layers == -1) { 5650 used_xadj = NULL; 5651 used_adjncy = NULL; 5652 } else { 5653 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5654 used_xadj = pcbddc->mat_graph->xadj; 5655 used_adjncy = pcbddc->mat_graph->adjncy; 5656 } else if (pcbddc->computed_rowadj) { 5657 used_xadj = pcbddc->mat_graph->xadj; 5658 used_adjncy = pcbddc->mat_graph->adjncy; 5659 } else { 5660 PetscBool flg_row=PETSC_FALSE; 5661 const PetscInt *xadj,*adjncy; 5662 PetscInt nvtxs; 5663 5664 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5665 if (flg_row) { 5666 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5667 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5668 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5669 free_used_adj = PETSC_TRUE; 5670 } else { 5671 pcbddc->sub_schurs_layers = -1; 5672 used_xadj = NULL; 5673 used_adjncy = NULL; 5674 } 5675 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5676 } 5677 } 5678 5679 /* setup sub_schurs data */ 5680 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5681 if (!sub_schurs->use_mumps) { 5682 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5683 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5684 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr); 5685 } else { 5686 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5687 PetscBool isseqaij; 5688 if (!pcbddc->use_vertices && reuse_solvers) { 5689 PetscInt n_vertices; 5690 5691 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5692 reuse_solvers = (PetscBool)!n_vertices; 5693 } 5694 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5695 if (!isseqaij) { 5696 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5697 if (matis->A == pcbddc->local_mat) { 5698 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5699 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5700 } else { 5701 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5702 } 5703 } 5704 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr); 5705 } 5706 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5707 5708 /* free adjacency */ 5709 if (free_used_adj) { 5710 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5711 } 5712 PetscFunctionReturn(0); 5713 } 5714 5715 #undef __FUNCT__ 5716 #define __FUNCT__ "PCBDDCInitSubSchurs" 5717 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5718 { 5719 PC_IS *pcis=(PC_IS*)pc->data; 5720 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5721 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5722 PCBDDCGraph graph; 5723 PetscErrorCode ierr; 5724 5725 PetscFunctionBegin; 5726 /* attach interface graph for determining subsets */ 5727 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5728 IS verticesIS,verticescomm; 5729 PetscInt vsize,*idxs; 5730 5731 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5732 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5733 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5734 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5735 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5736 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5737 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5738 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5739 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5740 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5741 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5742 /* 5743 if (pcbddc->dbg_flag) { 5744 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5745 } 5746 */ 5747 } else { 5748 graph = pcbddc->mat_graph; 5749 } 5750 5751 /* sub_schurs init */ 5752 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5753 5754 /* free graph struct */ 5755 if (pcbddc->sub_schurs_rebuild) { 5756 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5757 } 5758 PetscFunctionReturn(0); 5759 } 5760 5761 #undef __FUNCT__ 5762 #define __FUNCT__ "PCBDDCCheckOperator" 5763 PetscErrorCode PCBDDCCheckOperator(PC pc) 5764 { 5765 PC_IS *pcis=(PC_IS*)pc->data; 5766 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5767 PetscErrorCode ierr; 5768 5769 PetscFunctionBegin; 5770 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5771 IS zerodiag = NULL; 5772 Mat S_j,B0_B=NULL; 5773 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5774 PetscScalar *p0_check,*array,*array2; 5775 PetscReal norm; 5776 PetscInt i; 5777 5778 /* B0 and B0_B */ 5779 if (zerodiag) { 5780 IS dummy; 5781 5782 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5783 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5784 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5785 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5786 } 5787 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5788 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5789 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5790 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5791 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5792 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5793 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5794 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5795 /* S_j */ 5796 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5797 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5798 5799 /* mimic vector in \widetilde{W}_\Gamma */ 5800 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5801 /* continuous in primal space */ 5802 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5803 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5804 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5805 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5806 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5807 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5808 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5809 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5810 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5811 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5812 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5813 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5814 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5815 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5816 5817 /* assemble rhs for coarse problem */ 5818 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5819 /* local with Schur */ 5820 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5821 if (zerodiag) { 5822 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5823 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5824 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5825 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5826 } 5827 /* sum on primal nodes the local contributions */ 5828 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5829 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5830 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5831 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5832 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5833 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5834 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5835 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5836 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5837 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5838 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5839 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5840 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5841 /* scale primal nodes (BDDC sums contibutions) */ 5842 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5843 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5844 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5845 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5846 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5847 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5848 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5849 /* global: \widetilde{B0}_B w_\Gamma */ 5850 if (zerodiag) { 5851 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5852 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5853 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5854 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5855 } 5856 /* BDDC */ 5857 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5858 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5859 5860 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5861 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5862 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5863 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5864 for (i=0;i<pcbddc->benign_n;i++) { 5865 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5866 } 5867 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5868 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5869 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5870 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5871 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5872 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5873 } 5874 PetscFunctionReturn(0); 5875 } 5876