1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y); 7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y); 8 9 /* TODO: add reuse flag */ 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSeqAIJCompress" 12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 13 { 14 Mat Bt; 15 PetscScalar *a,*bdata; 16 const PetscInt *ii,*ij; 17 PetscInt m,n,i,nnz,*bii,*bij; 18 PetscBool flg_row; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 23 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 24 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 25 nnz = n; 26 for (i=0;i<ii[n];i++) { 27 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 28 } 29 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 30 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 31 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 32 nnz = 0; 33 bii[0] = 0; 34 for (i=0;i<n;i++) { 35 PetscInt j; 36 for (j=ii[i];j<ii[i+1];j++) { 37 PetscScalar entry = a[j]; 38 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 39 bij[nnz] = ij[j]; 40 bdata[nnz] = entry; 41 nnz++; 42 } 43 } 44 bii[i+1] = nnz; 45 } 46 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 47 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 48 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 49 { 50 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 51 b->free_a = PETSC_TRUE; 52 b->free_ij = PETSC_TRUE; 53 } 54 *B = Bt; 55 PetscFunctionReturn(0); 56 } 57 58 #undef __FUNCT__ 59 #define __FUNCT__ "MatDetectDisconnectedComponents" 60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[]) 61 { 62 Mat B; 63 IS is_dummy,*cc_n; 64 ISLocalToGlobalMapping l2gmap_dummy; 65 PCBDDCGraph graph; 66 PetscInt i,n; 67 PetscInt *xadj,*adjncy; 68 PetscInt *xadj_filtered,*adjncy_filtered; 69 PetscBool flg_row,isseqaij; 70 PetscErrorCode ierr; 71 72 PetscFunctionBegin; 73 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 74 if (!isseqaij && filter) { 75 PetscBool isseqdense; 76 77 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 78 if (!isseqdense) { 79 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 80 } else { /* TODO: rectangular case and LDA */ 81 PetscScalar *array; 82 PetscReal chop=1.e-6; 83 84 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 85 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 86 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 87 for (i=0;i<n;i++) { 88 PetscInt j; 89 for (j=i+1;j<n;j++) { 90 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 91 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 92 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 93 } 94 } 95 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 96 ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr); 97 } 98 } else { 99 B = A; 100 } 101 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 102 103 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 104 if (filter) { 105 PetscScalar *data; 106 PetscInt j,cum; 107 108 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 109 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 110 cum = 0; 111 for (i=0;i<n;i++) { 112 PetscInt t; 113 114 for (j=xadj[i];j<xadj[i+1];j++) { 115 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 116 continue; 117 } 118 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 119 } 120 t = xadj_filtered[i]; 121 xadj_filtered[i] = cum; 122 cum += t; 123 } 124 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 125 } else { 126 xadj_filtered = NULL; 127 adjncy_filtered = NULL; 128 } 129 130 /* compute local connected components using PCBDDCGraph */ 131 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 132 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 133 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 134 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 135 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr); 136 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 137 if (xadj_filtered) { 138 graph->xadj = xadj_filtered; 139 graph->adjncy = adjncy_filtered; 140 } else { 141 graph->xadj = xadj; 142 graph->adjncy = adjncy; 143 } 144 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 145 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 146 /* partial clean up */ 147 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 148 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 149 if (A != B) { 150 ierr = MatDestroy(&B);CHKERRQ(ierr); 151 } 152 153 /* get back data */ 154 if (ncc) *ncc = graph->ncc; 155 if (cc) { 156 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 157 for (i=0;i<graph->ncc;i++) { 158 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 159 } 160 *cc = cc_n; 161 } 162 /* clean up graph */ 163 graph->xadj = 0; 164 graph->adjncy = 0; 165 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "PCBDDCBenignCheck" 171 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 172 { 173 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 174 PC_IS* pcis = (PC_IS*)(pc->data); 175 IS dirIS = NULL; 176 PetscInt i; 177 PetscErrorCode ierr; 178 179 PetscFunctionBegin; 180 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 181 if (zerodiag) { 182 Mat A; 183 Vec vec3_N; 184 PetscScalar *vals; 185 const PetscInt *idxs; 186 PetscInt nz; 187 188 /* p0 */ 189 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 190 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 191 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 192 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 193 for (i=0;i<nz;i++) vals[i] = 1.; 194 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 195 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 196 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 197 /* v_I */ 198 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 199 for (i=0;i<nz;i++) vals[i] = 0.; 200 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 201 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 202 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 203 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 204 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 205 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 206 if (dirIS) { 207 PetscInt n; 208 209 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 210 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 211 for (i=0;i<n;i++) vals[i] = 0.; 212 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 213 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 214 } 215 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 216 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 217 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 218 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 219 ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr); 220 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 221 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 222 if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { 223 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0])); 224 } 225 ierr = PetscFree(vals);CHKERRQ(ierr); 226 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 227 } 228 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 229 230 /* check PCBDDCBenignGetOrSetP0 */ 231 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 232 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 233 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 234 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 235 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 236 for (i=0;i<pcbddc->benign_n;i++) { 237 if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) { 238 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr); 239 } 240 } 241 PetscFunctionReturn(0); 242 } 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint" 246 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 247 { 248 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 249 IS pressures,zerodiag,*zerodiag_subs; 250 PetscInt nz,n; 251 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag; 252 PetscErrorCode ierr; 253 254 PetscFunctionBegin; 255 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 256 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 257 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 258 /* if a local info on dofs is present, assumes the last field is represented by "pressures" 259 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 260 Checks if all the pressure dofs in each subdomain have a zero diagonal 261 If not, a change of basis on pressures is not needed 262 since the local Schur complements are SPD 263 */ 264 has_null_pressures = PETSC_TRUE; 265 have_null = PETSC_TRUE; 266 if (pcbddc->n_ISForDofsLocal) { 267 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 268 269 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 270 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 271 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 272 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 273 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 274 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 275 if (!sorted) { 276 ierr = ISSort(pressures);CHKERRQ(ierr); 277 } 278 } else { 279 pressures = NULL; 280 } 281 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 282 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 283 /* TODO: add check for shared dofs and raise error */ 284 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 285 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 286 if (!sorted) { 287 ierr = ISSort(zerodiag);CHKERRQ(ierr); 288 } 289 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 290 if (!nz) { 291 if (n) have_null = PETSC_FALSE; 292 has_null_pressures = PETSC_FALSE; 293 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 294 } 295 recompute_zerodiag = PETSC_FALSE; 296 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 297 zerodiag_subs = NULL; 298 pcbddc->benign_n = 0; 299 if (has_null_pressures) { 300 IS *subs; 301 PetscInt nsubs,i; 302 303 subs = pcbddc->local_subs; 304 nsubs = pcbddc->n_local_subs; 305 if (nsubs > 1) { 306 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 307 for (i=0;i<nsubs;i++) { 308 ISLocalToGlobalMapping l2g; 309 IS t_zerodiag_subs; 310 PetscInt nl; 311 312 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 313 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 314 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 315 if (nl) { 316 PetscBool valid = PETSC_TRUE; 317 318 if (pressures) { 319 IS t_pressure_subs; 320 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 321 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 322 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 323 } 324 if (valid) { 325 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 326 pcbddc->benign_n++; 327 } else { 328 recompute_zerodiag = PETSC_TRUE; 329 } 330 } 331 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 332 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 333 } 334 } else { /* there's just one subdomain (or zero if they have not been detected */ 335 PetscBool valid = PETSC_TRUE; 336 if (pressures) { 337 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 338 } 339 if (valid) { 340 pcbddc->benign_n = 1; 341 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 342 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 343 zerodiag_subs[0] = zerodiag; 344 } 345 } 346 } 347 348 if (!pcbddc->benign_n) { 349 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 350 recompute_zerodiag = PETSC_FALSE; 351 has_null_pressures = PETSC_FALSE; 352 have_null = PETSC_FALSE; 353 } 354 355 /* final check for null pressures */ 356 if (zerodiag && pressures) { 357 PetscInt nz,np; 358 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 359 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 360 if (nz != np) have_null = PETSC_FALSE; 361 } 362 363 if (recompute_zerodiag) { 364 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 365 if (pcbddc->benign_n == 1) { 366 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 367 zerodiag = zerodiag_subs[0]; 368 } else { 369 PetscInt i,nzn,*new_idxs; 370 371 nzn = 0; 372 for (i=0;i<pcbddc->benign_n;i++) { 373 PetscInt ns; 374 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 375 nzn += ns; 376 } 377 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 378 nzn = 0; 379 for (i=0;i<pcbddc->benign_n;i++) { 380 PetscInt ns,*idxs; 381 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 382 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 383 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 384 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 385 nzn += ns; 386 } 387 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 388 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 389 } 390 have_null = PETSC_FALSE; 391 } 392 393 if (has_null_pressures) { 394 IS zerodiagc; 395 const PetscInt *idxs,*idxsc; 396 PetscInt i,s,*nnz; 397 398 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 399 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 400 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 401 /* local change of basis for pressures */ 402 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 403 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 404 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 405 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 406 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 407 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 408 for (i=0;i<pcbddc->benign_n;i++) { 409 PetscInt nzs,j; 410 411 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 412 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 413 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 414 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 415 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 416 } 417 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 418 ierr = PetscFree(nnz);CHKERRQ(ierr); 419 /* set identity on velocities */ 420 for (i=0;i<n-nz;i++) { 421 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 422 } 423 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 424 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 425 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 426 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 427 /* set change on pressures */ 428 for (s=0;s<pcbddc->benign_n;s++) { 429 PetscScalar *array; 430 PetscInt nzs; 431 432 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 433 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 434 for (i=0;i<nzs-1;i++) { 435 PetscScalar vals[2]; 436 PetscInt cols[2]; 437 438 cols[0] = idxs[i]; 439 cols[1] = idxs[nzs-1]; 440 vals[0] = 1.; 441 vals[1] = 1.; 442 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 443 } 444 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 445 for (i=0;i<nzs-1;i++) array[i] = -1.; 446 array[nzs-1] = 1.; 447 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 448 /* store local idxs for p0 */ 449 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 450 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 451 ierr = PetscFree(array);CHKERRQ(ierr); 452 } 453 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 454 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 455 /* TODO: need optimization? */ 456 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr); 457 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 458 ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr); 459 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 460 /* store global idxs for p0 */ 461 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 462 } 463 pcbddc->benign_zerodiag_subs = zerodiag_subs; 464 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 465 466 /* determines if the coarse solver will be singular or not */ 467 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 468 *zerodiaglocal = zerodiag; 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNCT__ 473 #define __FUNCT__ "PCBDDCBenignGetOrSetP0" 474 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 475 { 476 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 477 PetscErrorCode ierr; 478 479 PetscFunctionBegin; 480 if (!pcbddc->benign_sf) { 481 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 482 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 483 } 484 if (get) { /* use SF to get values */ 485 PetscScalar *array; 486 487 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 488 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 489 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 490 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 491 } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */ 492 ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr); 493 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 494 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 495 } 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNCT__ 500 #define __FUNCT__ "PCBDDCBenignPopOrPushB0" 501 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 502 { 503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 504 PetscErrorCode ierr; 505 506 PetscFunctionBegin; 507 /* TODO: add error checking 508 - avoid nested pop (or push) calls. 509 - cannot push before pop. 510 - cannot call this if pcbddc->local_mat is NULL 511 */ 512 if (!pcbddc->benign_n) { 513 PetscFunctionReturn(0); 514 } 515 if (pop) { 516 IS is_p0; 517 MatReuse reuse; 518 519 /* extract B_0 */ 520 reuse = MAT_INITIAL_MATRIX; 521 if (pcbddc->benign_B0) { 522 reuse = MAT_REUSE_MATRIX; 523 } 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 525 ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 526 /* remove rows and cols from local problem */ 527 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 528 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 529 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 530 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 531 } else { /* push */ 532 PetscInt i; 533 534 for (i=0;i<pcbddc->benign_n;i++) { 535 PetscScalar *B0_vals; 536 PetscInt *B0_cols,B0_ncol; 537 538 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 539 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 540 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 541 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 542 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 543 } 544 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 545 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 546 } 547 PetscFunctionReturn(0); 548 } 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "PCBDDCAdaptiveSelection" 552 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 553 { 554 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 555 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 556 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 557 PetscBLASInt *B_iwork,*B_ifail; 558 PetscScalar *work,lwork; 559 PetscScalar *St,*S,*eigv; 560 PetscScalar *Sarray,*Starray; 561 PetscReal *eigs,thresh; 562 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 563 PetscBool allocated_S_St; 564 #if defined(PETSC_USE_COMPLEX) 565 PetscReal *rwork; 566 #endif 567 PetscErrorCode ierr; 568 569 PetscFunctionBegin; 570 if (!sub_schurs->use_mumps) { 571 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS"); 572 } 573 574 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) { 575 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef); 576 } 577 578 if (pcbddc->dbg_flag) { 579 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 580 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 582 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 583 } 584 585 if (pcbddc->dbg_flag) { 586 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 587 } 588 589 /* max size of subsets */ 590 mss = 0; 591 for (i=0;i<sub_schurs->n_subs;i++) { 592 PetscInt subset_size; 593 594 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 595 mss = PetscMax(mss,subset_size); 596 } 597 598 /* min/max and threshold */ 599 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 600 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 601 nmax = PetscMax(nmin,nmax); 602 allocated_S_St = PETSC_FALSE; 603 if (nmin) { 604 allocated_S_St = PETSC_TRUE; 605 } 606 607 /* allocate lapack workspace */ 608 cum = cum2 = 0; 609 maxneigs = 0; 610 for (i=0;i<sub_schurs->n_subs;i++) { 611 PetscInt n,subset_size; 612 613 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 614 n = PetscMin(subset_size,nmax); 615 cum += subset_size; 616 cum2 += subset_size*n; 617 maxneigs = PetscMax(maxneigs,n); 618 } 619 if (mss) { 620 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 621 PetscBLASInt B_itype = 1; 622 PetscBLASInt B_N = mss; 623 PetscReal zero = 0.0; 624 PetscReal eps = 0.0; /* dlamch? */ 625 626 B_lwork = -1; 627 S = NULL; 628 St = NULL; 629 eigs = NULL; 630 eigv = NULL; 631 B_iwork = NULL; 632 B_ifail = NULL; 633 #if defined(PETSC_USE_COMPLEX) 634 rwork = NULL; 635 #endif 636 thresh = 1.0; 637 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 638 #if defined(PETSC_USE_COMPLEX) 639 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 640 #else 641 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 642 #endif 643 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 644 ierr = PetscFPTrapPop();CHKERRQ(ierr); 645 } else { 646 /* TODO */ 647 } 648 } else { 649 lwork = 0; 650 } 651 652 nv = 0; 653 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 654 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 655 } 656 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 657 if (allocated_S_St) { 658 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 659 } 660 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 661 #if defined(PETSC_USE_COMPLEX) 662 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 663 #endif 664 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 665 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 666 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 667 nv+cum,&pcbddc->adaptive_constraints_idxs, 668 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 669 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 670 671 maxneigs = 0; 672 cum = cum2 = cumarray = 0; 673 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 674 pcbddc->adaptive_constraints_data_ptr[0] = 0; 675 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 676 const PetscInt *idxs; 677 678 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 679 for (cum=0;cum<nv;cum++) { 680 pcbddc->adaptive_constraints_n[cum] = 1; 681 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 682 pcbddc->adaptive_constraints_data[cum] = 1.0; 683 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 684 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 685 } 686 cum2 = cum; 687 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 688 } 689 690 if (mss) { /* multilevel */ 691 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 692 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 693 } 694 695 thresh = pcbddc->adaptive_threshold; 696 for (i=0;i<sub_schurs->n_subs;i++) { 697 const PetscInt *idxs; 698 PetscReal infty = PETSC_MAX_REAL; 699 PetscInt j,subset_size,eigs_start = 0; 700 PetscBLASInt B_N; 701 PetscBool same_data = PETSC_FALSE; 702 703 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 704 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 705 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 706 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 707 if (sub_schurs->is_hermitian) { 708 PetscInt j,k; 709 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 710 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 711 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 712 } 713 for (j=0;j<subset_size;j++) { 714 for (k=j;k<subset_size;k++) { 715 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 716 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 717 } 718 } 719 } else { 720 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 721 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 722 } 723 } else { 724 S = Sarray + cumarray; 725 St = Starray + cumarray; 726 } 727 728 /* see if we can save some work */ 729 if (sub_schurs->n_subs == 1) { 730 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 731 } 732 733 if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */ 734 B_neigs = 0; 735 } else { 736 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 737 PetscBLASInt B_itype = 1; 738 PetscBLASInt B_IL, B_IU; 739 PetscReal eps = -1.0; /* dlamch? */ 740 PetscInt nmin_s; 741 742 if (pcbddc->dbg_flag) { 743 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]); 744 } 745 746 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 747 if (thresh > 1.+PETSC_SMALL) { 748 749 /* ask for eigenvalues larger than thresh */ 750 #if defined(PETSC_USE_COMPLEX) 751 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 752 #else 753 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 754 #endif 755 } else { 756 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 757 B_IL = 1; 758 #if defined(PETSC_USE_COMPLEX) 759 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 760 #else 761 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 762 #endif 763 } 764 ierr = PetscFPTrapPop();CHKERRQ(ierr); 765 if (B_ierr) { 766 if (B_ierr < 0 ) { 767 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 768 } else if (B_ierr <= B_N) { 769 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 770 } else { 771 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 772 } 773 } 774 775 if (B_neigs > nmax) { 776 if (pcbddc->dbg_flag) { 777 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 778 } 779 eigs_start = B_neigs -nmax; 780 B_neigs = nmax; 781 } 782 783 nmin_s = PetscMin(nmin,B_N); 784 if (B_neigs < nmin_s) { 785 PetscBLASInt B_neigs2; 786 787 B_IU = B_N - B_neigs; 788 B_IL = B_N - nmin_s + 1; 789 if (pcbddc->dbg_flag) { 790 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 791 } 792 if (sub_schurs->is_hermitian) { 793 PetscInt j; 794 for (j=0;j<subset_size;j++) { 795 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 796 } 797 for (j=0;j<subset_size;j++) { 798 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr); 799 } 800 } else { 801 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 802 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 803 } 804 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 805 #if defined(PETSC_USE_COMPLEX) 806 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 807 #else 808 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 809 #endif 810 ierr = PetscFPTrapPop();CHKERRQ(ierr); 811 B_neigs += B_neigs2; 812 } 813 if (B_ierr) { 814 if (B_ierr < 0 ) { 815 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 816 } else if (B_ierr <= B_N) { 817 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 818 } else { 819 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 820 } 821 } 822 if (pcbddc->dbg_flag) { 823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 824 for (j=0;j<B_neigs;j++) { 825 if (eigs[j] == 0.0) { 826 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 827 } else { 828 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 829 } 830 } 831 } 832 } else { 833 /* TODO */ 834 } 835 } 836 maxneigs = PetscMax(B_neigs,maxneigs); 837 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 838 if (B_neigs) { 839 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 840 841 if (pcbddc->dbg_flag > 1) { 842 PetscInt ii; 843 for (ii=0;ii<B_neigs;ii++) { 844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 845 for (j=0;j<B_N;j++) { 846 #if defined(PETSC_USE_COMPLEX) 847 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 848 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 849 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 850 #else 851 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 852 #endif 853 } 854 } 855 } 856 #if 0 857 for (j=0;j<B_neigs;j++) { 858 PetscBLASInt Blas_N,Blas_one = 1.0; 859 PetscScalar norm; 860 ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr); 861 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size, 862 &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 863 if (pcbddc->adaptive_constraints_data[cum2] > 0.0) { 864 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 865 } else { 866 norm = -1.0/PetscSqrtReal(PetscRealPart(norm)); 867 } 868 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one)); 869 } 870 #endif 871 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 872 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 873 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 874 cum++; 875 } 876 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 877 /* shift for next computation */ 878 cumarray += subset_size*subset_size; 879 } 880 if (pcbddc->dbg_flag) { 881 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 882 } 883 884 if (mss) { 885 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 886 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 887 /* destroy matrices (junk) */ 888 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 889 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 890 } 891 if (allocated_S_St) { 892 ierr = PetscFree2(S,St);CHKERRQ(ierr); 893 } 894 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 895 #if defined(PETSC_USE_COMPLEX) 896 ierr = PetscFree(rwork);CHKERRQ(ierr); 897 #endif 898 if (pcbddc->dbg_flag) { 899 PetscInt maxneigs_r; 900 ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 901 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 902 } 903 PetscFunctionReturn(0); 904 } 905 906 #undef __FUNCT__ 907 #define __FUNCT__ "PCBDDCSetUpSolvers" 908 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 909 { 910 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 911 PetscScalar *coarse_submat_vals; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 /* Setup local scatters R_to_B and (optionally) R_to_D */ 916 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 917 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 918 919 /* Setup local neumann solver ksp_R */ 920 /* PCBDDCSetUpLocalScatters should be called first! */ 921 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 922 923 /* Change global null space passed in by the user if change of basis has been requested */ 924 if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) { 925 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 926 } 927 928 /* 929 Setup local correction and local part of coarse basis. 930 Gives back the dense local part of the coarse matrix in column major ordering 931 */ 932 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 933 934 /* Compute total number of coarse nodes and setup coarse solver */ 935 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 936 937 /* free */ 938 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 #undef __FUNCT__ 943 #define __FUNCT__ "PCBDDCResetCustomization" 944 PetscErrorCode PCBDDCResetCustomization(PC pc) 945 { 946 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 947 PetscErrorCode ierr; 948 949 PetscFunctionBegin; 950 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 951 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 952 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 953 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 954 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 955 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 956 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 957 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 958 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 959 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 960 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 961 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "PCBDDCResetTopography" 967 PetscErrorCode PCBDDCResetTopography(PC pc) 968 { 969 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 970 PetscInt i; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 975 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 976 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 977 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 978 for (i=0;i<pcbddc->n_local_subs;i++) { 979 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 980 } 981 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 982 ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNCT__ 987 #define __FUNCT__ "PCBDDCResetSolvers" 988 PetscErrorCode PCBDDCResetSolvers(PC pc) 989 { 990 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 991 PetscErrorCode ierr; 992 993 PetscFunctionBegin; 994 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 995 if (pcbddc->coarse_phi_B) { 996 PetscScalar *array; 997 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 998 ierr = PetscFree(array);CHKERRQ(ierr); 999 } 1000 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1001 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1002 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1003 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1004 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1005 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1006 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1007 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1008 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1009 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1010 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 1011 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 1012 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 1013 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 1014 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 1015 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 1016 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 1017 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1018 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 1019 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 1020 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 1021 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 1022 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 1023 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 1024 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 1025 ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr); 1026 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 1027 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 1028 if (pcbddc->benign_zerodiag_subs) { 1029 PetscInt i; 1030 for (i=0;i<pcbddc->benign_n;i++) { 1031 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1032 } 1033 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 1034 } 1035 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 1041 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 1042 { 1043 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1044 PC_IS *pcis = (PC_IS*)pc->data; 1045 VecType impVecType; 1046 PetscInt n_constraints,n_R,old_size; 1047 PetscErrorCode ierr; 1048 1049 PetscFunctionBegin; 1050 if (!pcbddc->ConstraintMatrix) { 1051 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 1052 } 1053 /* get sizes */ 1054 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 1055 n_R = pcis->n - pcbddc->n_vertices; 1056 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 1057 /* local work vectors (try to avoid unneeded work)*/ 1058 /* R nodes */ 1059 old_size = -1; 1060 if (pcbddc->vec1_R) { 1061 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 1062 } 1063 if (n_R != old_size) { 1064 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 1065 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 1066 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 1067 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 1068 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 1069 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 1070 } 1071 /* local primal dofs */ 1072 old_size = -1; 1073 if (pcbddc->vec1_P) { 1074 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 1075 } 1076 if (pcbddc->local_primal_size != old_size) { 1077 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 1078 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 1079 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 1080 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 1081 } 1082 /* local explicit constraints */ 1083 old_size = -1; 1084 if (pcbddc->vec1_C) { 1085 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 1086 } 1087 if (n_constraints && n_constraints != old_size) { 1088 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 1089 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 1090 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 1091 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "PCBDDCSetUpCorrection" 1098 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 1099 { 1100 PetscErrorCode ierr; 1101 /* pointers to pcis and pcbddc */ 1102 PC_IS* pcis = (PC_IS*)pc->data; 1103 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1104 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 1105 /* submatrices of local problem */ 1106 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 1107 /* submatrices of benign trick */ 1108 Mat B0_V = NULL; 1109 /* submatrices of local coarse problem */ 1110 Mat S_VV,S_CV,S_VC,S_CC; 1111 /* working matrices */ 1112 Mat C_CR; 1113 /* additional working stuff */ 1114 PC pc_R; 1115 Mat F; 1116 PetscBool isLU,isCHOL,isILU; 1117 1118 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 1119 PetscScalar *work; 1120 PetscInt *idx_V_B; 1121 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 1122 PetscInt i,n_R,n_D,n_B; 1123 1124 /* some shortcuts to scalars */ 1125 PetscScalar one=1.0,m_one=-1.0; 1126 1127 PetscFunctionBegin; 1128 if (!pcbddc->symmetric_primal && pcbddc->benign_n) { 1129 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 1130 } 1131 1132 /* Set Non-overlapping dimensions */ 1133 n_vertices = pcbddc->n_vertices; 1134 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 1135 n_B = pcis->n_B; 1136 n_D = pcis->n - n_B; 1137 n_R = pcis->n - n_vertices; 1138 1139 /* vertices in boundary numbering */ 1140 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 1141 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 1142 if (i != n_vertices) { 1143 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 1144 } 1145 1146 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 1147 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 1148 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 1149 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 1150 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 1151 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 1152 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 1153 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 1154 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 1155 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 1156 1157 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 1158 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 1159 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 1160 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 1161 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 1162 lda_rhs = n_R; 1163 if (isLU || isILU || isCHOL) { 1164 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 1165 } else if (sub_schurs->reuse_mumps) { 1166 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 1167 MatFactorType type; 1168 1169 F = reuse_mumps->F; 1170 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 1171 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 1172 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 1173 } else { 1174 F = NULL; 1175 } 1176 1177 /* allocate workspace */ 1178 n = 0; 1179 if (n_constraints) { 1180 n += lda_rhs*n_constraints; 1181 } 1182 if (n_vertices) { 1183 n = PetscMax(2*lda_rhs*n_vertices,n); 1184 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 1185 } 1186 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1187 1188 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 1189 if (n_constraints) { 1190 Mat M1,M2,M3; 1191 Mat auxmat; 1192 IS is_aux; 1193 PetscScalar *array,*array2; 1194 1195 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 1196 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 1197 1198 /* Extract constraints on R nodes: C_{CR} */ 1199 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 1200 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 1201 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr); 1202 1203 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 1204 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 1205 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1206 for (i=0;i<n_constraints;i++) { 1207 const PetscScalar *row_cmat_values; 1208 const PetscInt *row_cmat_indices; 1209 PetscInt size_of_constraint,j; 1210 1211 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1212 for (j=0;j<size_of_constraint;j++) { 1213 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 1214 } 1215 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 1216 } 1217 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 1218 if (F) { 1219 Mat B; 1220 1221 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1222 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 1223 ierr = MatDestroy(&B);CHKERRQ(ierr); 1224 } else { 1225 PetscScalar *marr; 1226 1227 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1228 for (i=0;i<n_constraints;i++) { 1229 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1230 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 1231 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1232 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1233 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1234 } 1235 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 1236 } 1237 if (!pcbddc->switch_static) { 1238 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1239 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1240 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1241 for (i=0;i<n_constraints;i++) { 1242 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 1243 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 1244 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1246 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1247 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1248 } 1249 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 1250 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 1251 ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1252 } else { 1253 if (lda_rhs != n_R) { 1254 IS dummy; 1255 1256 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 1257 ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 1258 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1259 } else { 1260 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 1261 pcbddc->local_auxmat2 = local_auxmat2_R; 1262 } 1263 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 1264 } 1265 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1266 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 1267 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 1268 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 1269 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 1270 if (isCHOL) { 1271 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 1272 } else { 1273 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 1274 } 1275 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 1278 ierr = MatDestroy(&M2);CHKERRQ(ierr); 1279 ierr = MatDestroy(&M3);CHKERRQ(ierr); 1280 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 1281 ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 1282 ierr = MatDestroy(&auxmat);CHKERRQ(ierr); 1283 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 1284 ierr = MatDestroy(&M1);CHKERRQ(ierr); 1285 } 1286 /* Get submatrices from subdomain matrix */ 1287 if (pcbddc->benign_n) { 1288 IS dummy; 1289 Mat B0_R; 1290 PetscReal norm; 1291 1292 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1293 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr); 1294 ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr); 1295 if (norm > PETSC_SMALL) { 1296 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm); 1297 } 1298 ierr = MatDestroy(&B0_R);CHKERRQ(ierr); 1299 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1300 } 1301 1302 if (n_vertices) { 1303 IS is_aux; 1304 1305 if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */ 1306 IS tis; 1307 1308 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 1309 ierr = ISSort(tis);CHKERRQ(ierr); 1310 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 1311 ierr = ISDestroy(&tis);CHKERRQ(ierr); 1312 } else { 1313 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 1314 } 1315 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 1316 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 1317 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 1318 if (pcbddc->benign_n) { 1319 IS dummy; 1320 1321 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 1322 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr); 1323 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 1324 } 1325 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 1326 } 1327 1328 /* Matrix of coarse basis functions (local) */ 1329 if (pcbddc->coarse_phi_B) { 1330 PetscInt on_B,on_primal,on_D=n_D; 1331 if (pcbddc->coarse_phi_D) { 1332 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 1333 } 1334 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 1335 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 1336 PetscScalar *marray; 1337 1338 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 1339 ierr = PetscFree(marray);CHKERRQ(ierr); 1340 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1341 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1342 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1343 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1344 } 1345 } 1346 1347 if (!pcbddc->coarse_phi_B) { 1348 PetscScalar *marray; 1349 1350 n = n_B*pcbddc->local_primal_size; 1351 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1352 n += n_D*pcbddc->local_primal_size; 1353 } 1354 if (!pcbddc->symmetric_primal) { 1355 n *= 2; 1356 } 1357 ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr); 1358 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 1359 n = n_B*pcbddc->local_primal_size; 1360 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1361 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 1362 n += n_D*pcbddc->local_primal_size; 1363 } 1364 if (!pcbddc->symmetric_primal) { 1365 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 1366 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1367 n = n_B*pcbddc->local_primal_size; 1368 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 1369 } 1370 } else { 1371 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 1372 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 1373 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1374 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 1375 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 1376 } 1377 } 1378 } 1379 1380 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 1381 p0_lidx_I = NULL; 1382 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 1383 const PetscInt *idxs; 1384 1385 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1386 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 1387 for (i=0;i<pcbddc->benign_n;i++) { 1388 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 1389 } 1390 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 1391 } 1392 1393 /* vertices */ 1394 if (n_vertices) { 1395 1396 ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr); 1397 1398 if (n_R) { 1399 Mat A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */ 1400 PetscBLASInt B_N,B_one = 1; 1401 PetscScalar *x,*y; 1402 PetscBool isseqaij; 1403 1404 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 1405 if (lda_rhs == n_R) { 1406 ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1407 } else { 1408 PetscScalar *av,*array; 1409 const PetscInt *xadj,*adjncy; 1410 PetscInt n; 1411 PetscBool flg_row; 1412 1413 array = work+lda_rhs*n_vertices; 1414 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1415 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr); 1416 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1417 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 1418 for (i=0;i<n;i++) { 1419 PetscInt j; 1420 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 1421 } 1422 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1423 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1424 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 1425 } 1426 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1427 if (F) { /* TODO could be optimized for symmetric problems */ 1428 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 1429 } else { 1430 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 1431 for (i=0;i<n_vertices;i++) { 1432 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 1433 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 1434 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1435 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1436 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1437 } 1438 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 1439 } 1440 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1441 /* S_VV and S_CV */ 1442 if (n_constraints) { 1443 Mat B; 1444 1445 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 1446 for (i=0;i<n_vertices;i++) { 1447 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 1448 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 1449 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1450 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1451 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1452 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1453 } 1454 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1455 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 1456 ierr = MatDestroy(&B);CHKERRQ(ierr); 1457 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 1458 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1459 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 1460 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 1461 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 1462 ierr = MatDestroy(&B);CHKERRQ(ierr); 1463 } 1464 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 1465 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 1466 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1467 } 1468 if (lda_rhs != n_R) { 1469 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1470 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 1471 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 1472 } 1473 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 1474 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 1475 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 1476 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 1477 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 1478 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 1479 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 1480 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 1481 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1482 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 1483 } else { 1484 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1485 } 1486 for (i=0;i<pcbddc->benign_n;i++) { 1487 const PetscScalar *vals; 1488 const PetscInt *idxs; 1489 PetscInt n,j,primal_idx; 1490 1491 ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1492 primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i; 1493 for (j=0;j<n;j++) { 1494 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j]; 1495 coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j]; 1496 } 1497 ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr); 1498 } 1499 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 1500 1501 /* coarse basis functions */ 1502 for (i=0;i<n_vertices;i++) { 1503 PetscScalar *y; 1504 1505 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1506 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1507 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1508 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1509 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1510 y[n_B*i+idx_V_B[i]] = 1.0; 1511 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1512 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1513 1514 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1515 PetscInt j; 1516 1517 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1518 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1519 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1522 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1523 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1524 } 1525 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1526 } 1527 /* if n_R == 0 the object is not destroyed */ 1528 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 1529 } 1530 1531 if (n_constraints) { 1532 Mat B; 1533 1534 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 1535 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1536 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 1537 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 1538 if (n_vertices) { 1539 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 1540 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 1541 } else { 1542 Mat S_VCt; 1543 1544 if (lda_rhs != n_R) { 1545 ierr = MatDestroy(&B);CHKERRQ(ierr); 1546 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr); 1547 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 1548 } 1549 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 1550 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1551 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 1552 } 1553 } 1554 ierr = MatDestroy(&B);CHKERRQ(ierr); 1555 /* coarse basis functions */ 1556 for (i=0;i<n_constraints;i++) { 1557 PetscScalar *y; 1558 1559 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 1560 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1561 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 1562 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 1565 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1566 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1567 PetscInt j; 1568 1569 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1570 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 1571 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1574 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 1575 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 1576 } 1577 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1578 } 1579 } 1580 if (n_constraints) { 1581 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 1582 } 1583 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 1584 ierr = MatDestroy(&B0_V);CHKERRQ(ierr); 1585 1586 /* compute other basis functions for non-symmetric problems */ 1587 if (!pcbddc->symmetric_primal) { 1588 Mat B_V=NULL,B_C=NULL; 1589 PetscScalar *marray; 1590 1591 if (n_constraints) { 1592 Mat S_CCT,C_CRT; 1593 1594 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 1595 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 1596 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 1597 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 1598 if (n_vertices) { 1599 Mat S_VCT; 1600 1601 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 1602 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 1603 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 1604 } 1605 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 1606 } 1607 if (n_vertices && n_R) { 1608 PetscScalar *av,*marray; 1609 const PetscInt *xadj,*adjncy; 1610 PetscInt n; 1611 PetscBool flg_row; 1612 1613 /* B_V = B_V - A_VR^T */ 1614 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr); 1615 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1616 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 1617 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1618 for (i=0;i<n;i++) { 1619 PetscInt j; 1620 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 1621 } 1622 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1623 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 1624 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1625 } 1626 1627 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 1628 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 1629 for (i=0;i<n_vertices;i++) { 1630 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 1631 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1632 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1633 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1634 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1635 } 1636 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 1637 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 1638 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 1639 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 1640 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 1641 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1642 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1643 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 1644 } 1645 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 1646 /* coarse basis functions */ 1647 for (i=0;i<pcbddc->local_primal_size;i++) { 1648 PetscScalar *y; 1649 1650 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 1651 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1652 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 1653 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1654 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1655 if (i<n_vertices) { 1656 y[n_B*i+idx_V_B[i]] = 1.0; 1657 } 1658 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 1659 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 1660 1661 if (pcbddc->switch_static || pcbddc->dbg_flag) { 1662 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1663 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 1664 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1665 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 1667 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 1668 } 1669 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 1670 } 1671 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 1672 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 1673 } 1674 /* free memory */ 1675 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 1676 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 1677 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 1678 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 1679 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 1680 ierr = PetscFree(work);CHKERRQ(ierr); 1681 if (n_vertices) { 1682 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 1683 } 1684 if (n_constraints) { 1685 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 1686 } 1687 /* Checking coarse_sub_mat and coarse basis functios */ 1688 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1689 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 1690 if (pcbddc->dbg_flag) { 1691 Mat coarse_sub_mat; 1692 Mat AUXMAT,TM1,TM2,TM3,TM4; 1693 Mat coarse_phi_D,coarse_phi_B; 1694 Mat coarse_psi_D,coarse_psi_B; 1695 Mat A_II,A_BB,A_IB,A_BI; 1696 Mat C_B,CPHI; 1697 IS is_dummy; 1698 Vec mones; 1699 MatType checkmattype=MATSEQAIJ; 1700 PetscReal real_value; 1701 1702 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 1703 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 1704 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 1705 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 1706 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 1707 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 1708 if (!pcbddc->symmetric_primal) { 1709 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 1710 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 1711 } 1712 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 1713 1714 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1715 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 1716 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1717 if (!pcbddc->symmetric_primal) { 1718 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1719 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1720 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1721 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1722 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1723 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1724 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1725 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1726 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1727 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1728 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1729 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1730 } else { 1731 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 1732 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 1733 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1734 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 1735 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1736 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 1737 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 1738 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 1739 } 1740 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1741 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1742 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1743 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 1744 if (pcbddc->benign_n) { 1745 Mat B0_I,B0_B,B0_BPHI,B0_IPHI; 1746 PetscScalar *data,*data2; 1747 PetscInt j; 1748 1749 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 1750 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B); 1751 ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I); 1752 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 1753 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 1754 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 1755 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 1756 for (j=0;j<pcbddc->benign_n;j++) { 1757 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 1758 for (i=0;i<pcbddc->local_primal_size;i++) { 1759 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 1760 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 1761 } 1762 } 1763 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 1764 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 1765 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 1766 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1767 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 1768 ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr); 1769 ierr = MatDestroy(&B0_I);CHKERRQ(ierr); 1770 ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1771 ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr); 1772 } 1773 #if 0 1774 { 1775 PetscViewer viewer; 1776 char filename[256]; 1777 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 1778 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 1779 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1780 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 1781 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 1782 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 1783 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 1784 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 1785 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 1786 if (pcbddc->coarse_phi_D) { 1787 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 1788 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 1789 } 1790 if (pcbddc->coarse_psi_B) { 1791 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 1792 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 1793 } 1794 if (pcbddc->coarse_phi_B) { 1795 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 1796 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 1797 } 1798 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1799 } 1800 #endif 1801 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 1802 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1803 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 1804 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1805 1806 /* check constraints */ 1807 if (!pcbddc->benign_n) { /* TODO: add benign case */ 1808 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr); 1809 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B); 1810 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1811 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 1812 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1813 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1814 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1815 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1816 if (!pcbddc->symmetric_primal) { 1817 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 1818 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 1819 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 1820 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 1821 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 1822 } 1823 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 1824 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 1825 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 1826 ierr = VecDestroy(&mones);CHKERRQ(ierr); 1827 } 1828 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1829 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 1830 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 1831 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 1832 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 1833 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 1834 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 1835 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 1836 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 1837 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 1838 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 1839 if (!pcbddc->symmetric_primal) { 1840 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 1841 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 1842 } 1843 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 1844 } 1845 /* get back data */ 1846 *coarse_submat_vals_n = coarse_submat_vals; 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatGetSubMatrixUnsorted" 1852 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 1853 { 1854 Mat *work_mat; 1855 IS isrow_s,iscol_s; 1856 PetscBool rsorted,csorted; 1857 PetscInt rsize,*idxs_perm_r,csize,*idxs_perm_c; 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 1862 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 1863 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 1864 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1865 1866 if (!rsorted) { 1867 const PetscInt *idxs; 1868 PetscInt *idxs_sorted,i; 1869 1870 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 1871 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 1872 for (i=0;i<rsize;i++) { 1873 idxs_perm_r[i] = i; 1874 } 1875 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 1876 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 1877 for (i=0;i<rsize;i++) { 1878 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 1879 } 1880 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 1881 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 1882 } else { 1883 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1884 isrow_s = isrow; 1885 } 1886 1887 if (!csorted) { 1888 if (isrow == iscol) { 1889 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 1890 iscol_s = isrow_s; 1891 } else { 1892 const PetscInt *idxs; 1893 PetscInt *idxs_sorted,i; 1894 1895 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 1896 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 1897 for (i=0;i<csize;i++) { 1898 idxs_perm_c[i] = i; 1899 } 1900 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 1901 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 1902 for (i=0;i<csize;i++) { 1903 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 1904 } 1905 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 1906 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 1907 } 1908 } else { 1909 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1910 iscol_s = iscol; 1911 } 1912 1913 ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 1914 1915 if (!rsorted || !csorted) { 1916 Mat new_mat; 1917 IS is_perm_r,is_perm_c; 1918 1919 if (!rsorted) { 1920 PetscInt *idxs_r,i; 1921 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 1922 for (i=0;i<rsize;i++) { 1923 idxs_r[idxs_perm_r[i]] = i; 1924 } 1925 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 1926 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 1927 } else { 1928 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 1929 } 1930 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 1931 1932 if (!csorted) { 1933 if (isrow_s == iscol_s) { 1934 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 1935 is_perm_c = is_perm_r; 1936 } else { 1937 PetscInt *idxs_c,i; 1938 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 1939 for (i=0;i<csize;i++) { 1940 idxs_c[idxs_perm_c[i]] = i; 1941 } 1942 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 1943 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 1944 } 1945 } else { 1946 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 1947 } 1948 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 1949 1950 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 1951 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 1952 work_mat[0] = new_mat; 1953 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 1954 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 1955 } 1956 1957 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 1958 *B = work_mat[0]; 1959 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 1960 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 1961 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNCT__ 1966 #define __FUNCT__ "PCBDDCComputeLocalMatrix" 1967 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 1968 { 1969 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1970 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1971 Mat new_mat; 1972 IS is_local,is_global; 1973 PetscInt local_size; 1974 PetscBool isseqaij; 1975 PetscErrorCode ierr; 1976 1977 PetscFunctionBegin; 1978 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 1979 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 1980 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 1981 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 1982 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 1983 ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 1984 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 1985 1986 /* check */ 1987 if (pcbddc->dbg_flag) { 1988 Vec x,x_change; 1989 PetscReal error; 1990 1991 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 1992 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 1993 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 1994 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1995 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1996 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 1997 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1998 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1999 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 2000 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 2001 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2002 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr); 2003 ierr = VecDestroy(&x);CHKERRQ(ierr); 2004 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 2005 } 2006 2007 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 2008 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2009 if (isseqaij) { 2010 Mat M; 2011 2012 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2013 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2014 ierr = MatDestroy(&M);CHKERRQ(ierr); 2015 } else { 2016 Mat work_mat,M; 2017 2018 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 2019 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2020 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2021 ierr = MatDestroy(&M);CHKERRQ(ierr); 2022 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 2023 } 2024 if (matis->A->symmetric_set) { 2025 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 2026 #if !defined(PETSC_USE_COMPLEX) 2027 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 2028 #endif 2029 } 2030 /* 2031 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2032 ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr); 2033 */ 2034 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 #undef __FUNCT__ 2039 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 2040 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 2041 { 2042 PC_IS* pcis = (PC_IS*)(pc->data); 2043 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2044 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2045 PetscInt *idx_R_local=NULL; 2046 PetscInt n_vertices,i,j,n_R,n_D,n_B; 2047 PetscInt vbs,bs; 2048 PetscBT bitmask=NULL; 2049 PetscErrorCode ierr; 2050 2051 PetscFunctionBegin; 2052 /* 2053 No need to setup local scatters if 2054 - primal space is unchanged 2055 AND 2056 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 2057 AND 2058 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 2059 */ 2060 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 2061 PetscFunctionReturn(0); 2062 } 2063 /* destroy old objects */ 2064 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 2065 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 2066 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 2067 /* Set Non-overlapping dimensions */ 2068 n_B = pcis->n_B; 2069 n_D = pcis->n - n_B; 2070 n_vertices = pcbddc->n_vertices; 2071 2072 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 2073 2074 /* create auxiliary bitmask and allocate workspace */ 2075 if (!sub_schurs->reuse_mumps) { 2076 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 2077 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 2078 for (i=0;i<n_vertices;i++) { 2079 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 2080 } 2081 2082 for (i=0, n_R=0; i<pcis->n; i++) { 2083 if (!PetscBTLookup(bitmask,i)) { 2084 idx_R_local[n_R++] = i; 2085 } 2086 } 2087 } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */ 2088 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2089 2090 ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2091 ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr); 2092 } 2093 2094 /* Block code */ 2095 vbs = 1; 2096 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 2097 if (bs>1 && !(n_vertices%bs)) { 2098 PetscBool is_blocked = PETSC_TRUE; 2099 PetscInt *vary; 2100 if (!sub_schurs->reuse_mumps) { 2101 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 2102 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 2103 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 2104 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 2105 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 2106 for (i=0; i<pcis->n/bs; i++) { 2107 if (vary[i]!=0 && vary[i]!=bs) { 2108 is_blocked = PETSC_FALSE; 2109 break; 2110 } 2111 } 2112 ierr = PetscFree(vary);CHKERRQ(ierr); 2113 } else { 2114 /* Verify directly the R set */ 2115 for (i=0; i<n_R/bs; i++) { 2116 PetscInt j,node=idx_R_local[bs*i]; 2117 for (j=1; j<bs; j++) { 2118 if (node != idx_R_local[bs*i+j]-j) { 2119 is_blocked = PETSC_FALSE; 2120 break; 2121 } 2122 } 2123 } 2124 } 2125 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 2126 vbs = bs; 2127 for (i=0;i<n_R/vbs;i++) { 2128 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 2129 } 2130 } 2131 } 2132 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 2133 if (sub_schurs->reuse_mumps) { 2134 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2135 2136 ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2137 ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr); 2138 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 2139 reuse_mumps->is_R = pcbddc->is_R_local; 2140 } else { 2141 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 2142 } 2143 2144 /* print some info if requested */ 2145 if (pcbddc->dbg_flag) { 2146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2148 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2149 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 2150 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 2151 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 2152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2153 } 2154 2155 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 2156 if (!sub_schurs->reuse_mumps) { 2157 IS is_aux1,is_aux2; 2158 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 2159 2160 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2161 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 2162 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 2163 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2164 for (i=0; i<n_D; i++) { 2165 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 2166 } 2167 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2168 for (i=0, j=0; i<n_R; i++) { 2169 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 2170 aux_array1[j++] = i; 2171 } 2172 } 2173 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2174 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2175 for (i=0, j=0; i<n_B; i++) { 2176 if (!PetscBTLookup(bitmask,is_indices[i])) { 2177 aux_array2[j++] = i; 2178 } 2179 } 2180 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2181 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 2182 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 2183 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2184 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 2185 2186 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2187 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 2188 for (i=0, j=0; i<n_R; i++) { 2189 if (PetscBTLookup(bitmask,idx_R_local[i])) { 2190 aux_array1[j++] = i; 2191 } 2192 } 2193 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 2194 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2195 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 2196 } 2197 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 2198 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 2199 } else { 2200 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2201 IS tis; 2202 PetscInt schur_size; 2203 2204 ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr); 2205 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 2206 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 2207 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2208 if (pcbddc->switch_static || pcbddc->dbg_flag) { 2209 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 2210 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 2211 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2212 } 2213 } 2214 PetscFunctionReturn(0); 2215 } 2216 2217 2218 #undef __FUNCT__ 2219 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 2220 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 2221 { 2222 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2223 PC_IS *pcis = (PC_IS*)pc->data; 2224 PC pc_temp; 2225 Mat A_RR; 2226 MatReuse reuse; 2227 PetscScalar m_one = -1.0; 2228 PetscReal value; 2229 PetscInt n_D,n_R; 2230 PetscBool use_exact,use_exact_reduced,issbaij; 2231 PetscErrorCode ierr; 2232 /* prefixes stuff */ 2233 char dir_prefix[256],neu_prefix[256],str_level[16]; 2234 size_t len; 2235 2236 PetscFunctionBegin; 2237 2238 /* compute prefixes */ 2239 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 2240 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 2241 if (!pcbddc->current_level) { 2242 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2243 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 2244 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2245 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2246 } else { 2247 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 2248 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 2249 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 2250 len -= 15; /* remove "pc_bddc_coarse_" */ 2251 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 2252 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 2253 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2254 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 2255 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 2256 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 2257 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 2258 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 2259 } 2260 2261 /* DIRICHLET PROBLEM */ 2262 if (dirichlet) { 2263 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2264 if (pcbddc->local_mat->symmetric_set) { 2265 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2266 } 2267 /* Matrix for Dirichlet problem is pcis->A_II */ 2268 n_D = pcis->n - pcis->n_B; 2269 if (!pcbddc->ksp_D) { /* create object if not yet build */ 2270 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 2271 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 2272 /* default */ 2273 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 2274 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 2275 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2276 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2277 if (issbaij) { 2278 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2279 } else { 2280 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2281 } 2282 /* Allow user's customization */ 2283 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 2284 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2285 } 2286 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 2287 if (sub_schurs->reuse_mumps) { 2288 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2289 2290 ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr); 2291 } 2292 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2293 if (!n_D) { 2294 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 2295 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2296 } 2297 /* Set Up KSP for Dirichlet problem of BDDC */ 2298 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 2299 /* set ksp_D into pcis data */ 2300 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 2301 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 2302 pcis->ksp_D = pcbddc->ksp_D; 2303 } 2304 2305 /* NEUMANN PROBLEM */ 2306 A_RR = 0; 2307 if (neumann) { 2308 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2309 PetscInt ibs,mbs; 2310 PetscBool issbaij; 2311 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2312 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 2313 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 2314 if (pcbddc->ksp_R) { /* already created ksp */ 2315 PetscInt nn_R; 2316 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 2317 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 2318 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 2319 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 2320 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 2321 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2322 reuse = MAT_INITIAL_MATRIX; 2323 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 2324 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 2325 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2326 reuse = MAT_INITIAL_MATRIX; 2327 } else { /* safe to reuse the matrix */ 2328 reuse = MAT_REUSE_MATRIX; 2329 } 2330 } 2331 /* last check */ 2332 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 2333 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2334 reuse = MAT_INITIAL_MATRIX; 2335 } 2336 } else { /* first time, so we need to create the matrix */ 2337 reuse = MAT_INITIAL_MATRIX; 2338 } 2339 /* extract A_RR */ 2340 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 2341 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 2342 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2343 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 2344 if (matis->A == pcbddc->local_mat) { 2345 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2346 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2347 } else { 2348 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2349 } 2350 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 2351 if (matis->A == pcbddc->local_mat) { 2352 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2353 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2354 } else { 2355 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 2356 } 2357 } 2358 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 2359 if (pcbddc->local_mat->symmetric_set) { 2360 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 2361 } 2362 if (!pcbddc->ksp_R) { /* create object if not present */ 2363 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 2364 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 2365 /* default */ 2366 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 2367 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 2368 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2369 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 2370 if (issbaij) { 2371 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 2372 } else { 2373 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 2374 } 2375 /* Allow user's customization */ 2376 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 2377 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 2378 } 2379 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 2380 if (!n_R) { 2381 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 2382 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 2383 } 2384 /* Reuse MUMPS solver if it is present */ 2385 if (sub_schurs->reuse_mumps) { 2386 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2387 2388 ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr); 2389 } 2390 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 2391 /* Set Up KSP for Neumann problem of BDDC */ 2392 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 2393 } 2394 /* free Neumann problem's matrix */ 2395 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 2396 2397 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 2398 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 2399 if (pcbddc->dbg_flag) { 2400 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2401 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2402 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2403 } 2404 if (dirichlet) { /* Dirichlet */ 2405 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 2406 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 2407 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 2408 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 2409 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 2410 /* need to be adapted? */ 2411 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2412 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2413 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 2414 /* print info */ 2415 if (pcbddc->dbg_flag) { 2416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 2417 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2418 } 2419 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 2420 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr); 2421 } 2422 } 2423 if (neumann) { /* Neumann */ 2424 ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr); 2425 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 2426 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 2427 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 2428 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 2429 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 2430 /* need to be adapted? */ 2431 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 2432 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2433 /* print info */ 2434 if (pcbddc->dbg_flag) { 2435 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 2436 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2437 } 2438 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 2439 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr); 2440 } 2441 } 2442 } 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection" 2448 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 2449 { 2450 PetscErrorCode ierr; 2451 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2452 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2453 2454 PetscFunctionBegin; 2455 if (!sub_schurs->reuse_mumps) { 2456 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 2457 } 2458 if (!pcbddc->switch_static) { 2459 if (applytranspose && pcbddc->local_auxmat1) { 2460 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2461 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2462 } 2463 if (!sub_schurs->reuse_mumps) { 2464 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2465 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2466 } else { 2467 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2468 2469 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2470 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2471 } 2472 } else { 2473 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2474 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2475 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2476 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2477 if (applytranspose && pcbddc->local_auxmat1) { 2478 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 2479 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2480 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2481 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2482 } 2483 } 2484 if (!sub_schurs->reuse_mumps) { 2485 if (applytranspose) { 2486 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2487 } else { 2488 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2489 } 2490 } else { 2491 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2492 2493 if (applytranspose) { 2494 ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2495 } else { 2496 ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr); 2497 } 2498 } 2499 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 2500 if (!pcbddc->switch_static) { 2501 if (!sub_schurs->reuse_mumps) { 2502 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2503 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2504 } else { 2505 PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps; 2506 2507 ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2508 ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2509 } 2510 if (!applytranspose && pcbddc->local_auxmat1) { 2511 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2512 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 2513 } 2514 } else { 2515 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2516 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2517 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2518 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2519 if (!applytranspose && pcbddc->local_auxmat1) { 2520 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 2521 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 2522 } 2523 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2524 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2525 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2526 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2527 } 2528 PetscFunctionReturn(0); 2529 } 2530 2531 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 2532 #undef __FUNCT__ 2533 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 2534 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 2535 { 2536 PetscErrorCode ierr; 2537 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2538 PC_IS* pcis = (PC_IS*) (pc->data); 2539 const PetscScalar zero = 0.0; 2540 2541 PetscFunctionBegin; 2542 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 2543 if (applytranspose) { 2544 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2545 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2546 } else { 2547 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 2548 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 2549 } 2550 2551 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 2552 if (pcbddc->benign_n) { 2553 PetscScalar *array; 2554 PetscInt j; 2555 2556 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2557 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 2558 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2559 } 2560 2561 /* start communications from local primal nodes to rhs of coarse solver */ 2562 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 2563 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2564 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2565 2566 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 2567 /* TODO remove null space when doing multilevel */ 2568 if (pcbddc->coarse_ksp) { 2569 Mat coarse_mat; 2570 Vec rhs,sol; 2571 MatNullSpace nullsp; 2572 2573 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 2574 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 2575 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 2576 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 2577 if (nullsp) { 2578 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 2579 } 2580 if (applytranspose) { 2581 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2582 } else { 2583 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 2584 } 2585 if (nullsp) { 2586 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 2587 } 2588 } 2589 2590 /* Local solution on R nodes */ 2591 if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */ 2592 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 2593 } 2594 2595 /* communications from coarse sol to local primal nodes */ 2596 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2597 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2598 2599 /* Sum contributions from two levels */ 2600 if (applytranspose) { 2601 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2602 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2603 } else { 2604 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 2605 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 2606 } 2607 /* store p0 */ 2608 if (pcbddc->benign_n) { 2609 PetscScalar *array; 2610 PetscInt j; 2611 2612 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2613 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 2614 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 2615 } 2616 PetscFunctionReturn(0); 2617 } 2618 2619 #undef __FUNCT__ 2620 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 2621 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 2622 { 2623 PetscErrorCode ierr; 2624 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2625 PetscScalar *array; 2626 Vec from,to; 2627 2628 PetscFunctionBegin; 2629 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2630 from = pcbddc->coarse_vec; 2631 to = pcbddc->vec1_P; 2632 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2633 Vec tvec; 2634 2635 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2636 ierr = VecResetArray(tvec);CHKERRQ(ierr); 2637 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2638 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 2639 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 2640 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 2641 } 2642 } else { /* from local to global -> put data in coarse right hand side */ 2643 from = pcbddc->vec1_P; 2644 to = pcbddc->coarse_vec; 2645 } 2646 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2647 PetscFunctionReturn(0); 2648 } 2649 2650 #undef __FUNCT__ 2651 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 2652 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 2653 { 2654 PetscErrorCode ierr; 2655 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 2656 PetscScalar *array; 2657 Vec from,to; 2658 2659 PetscFunctionBegin; 2660 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 2661 from = pcbddc->coarse_vec; 2662 to = pcbddc->vec1_P; 2663 } else { /* from local to global -> put data in coarse right hand side */ 2664 from = pcbddc->vec1_P; 2665 to = pcbddc->coarse_vec; 2666 } 2667 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 2668 if (smode == SCATTER_FORWARD) { 2669 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 2670 Vec tvec; 2671 2672 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 2673 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 2674 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 2675 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 2676 } 2677 } else { 2678 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 2679 ierr = VecResetArray(from);CHKERRQ(ierr); 2680 } 2681 } 2682 PetscFunctionReturn(0); 2683 } 2684 2685 /* uncomment for testing purposes */ 2686 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 2687 #undef __FUNCT__ 2688 #define __FUNCT__ "PCBDDCConstraintsSetUp" 2689 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 2690 { 2691 PetscErrorCode ierr; 2692 PC_IS* pcis = (PC_IS*)(pc->data); 2693 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2694 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 2695 /* one and zero */ 2696 PetscScalar one=1.0,zero=0.0; 2697 /* space to store constraints and their local indices */ 2698 PetscScalar *constraints_data; 2699 PetscInt *constraints_idxs,*constraints_idxs_B; 2700 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 2701 PetscInt *constraints_n; 2702 /* iterators */ 2703 PetscInt i,j,k,total_counts,total_counts_cc,cum; 2704 /* BLAS integers */ 2705 PetscBLASInt lwork,lierr; 2706 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 2707 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 2708 /* reuse */ 2709 PetscInt olocal_primal_size,olocal_primal_size_cc; 2710 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 2711 /* change of basis */ 2712 PetscBool qr_needed; 2713 PetscBT change_basis,qr_needed_idx; 2714 /* auxiliary stuff */ 2715 PetscInt *nnz,*is_indices; 2716 PetscInt ncc; 2717 /* some quantities */ 2718 PetscInt n_vertices,total_primal_vertices,valid_constraints; 2719 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 2720 2721 PetscFunctionBegin; 2722 /* Destroy Mat objects computed previously */ 2723 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 2724 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 2725 /* save info on constraints from previous setup (if any) */ 2726 olocal_primal_size = pcbddc->local_primal_size; 2727 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 2728 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 2729 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2730 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 2731 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 2732 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2733 2734 /* print some info */ 2735 if (pcbddc->dbg_flag) { 2736 IS vertices; 2737 PetscInt nv,nedges,nfaces; 2738 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 2739 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 2740 ierr = ISDestroy(&vertices);CHKERRQ(ierr); 2741 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2742 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 2743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 2744 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 2745 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 2746 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2747 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2748 } 2749 2750 if (!pcbddc->adaptive_selection) { 2751 IS ISForVertices,*ISForFaces,*ISForEdges; 2752 MatNullSpace nearnullsp; 2753 const Vec *nearnullvecs; 2754 Vec *localnearnullsp; 2755 PetscScalar *array; 2756 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 2757 PetscBool nnsp_has_cnst; 2758 /* LAPACK working arrays for SVD or POD */ 2759 PetscBool skip_lapack,boolforchange; 2760 PetscScalar *work; 2761 PetscReal *singular_vals; 2762 #if defined(PETSC_USE_COMPLEX) 2763 PetscReal *rwork; 2764 #endif 2765 #if defined(PETSC_MISSING_LAPACK_GESVD) 2766 PetscScalar *temp_basis,*correlation_mat; 2767 #else 2768 PetscBLASInt dummy_int=1; 2769 PetscScalar dummy_scalar=1.; 2770 #endif 2771 2772 /* Get index sets for faces, edges and vertices from graph */ 2773 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 2774 /* free unneeded index sets */ 2775 if (!pcbddc->use_vertices) { 2776 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 2777 } 2778 if (!pcbddc->use_edges) { 2779 for (i=0;i<n_ISForEdges;i++) { 2780 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 2781 } 2782 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 2783 n_ISForEdges = 0; 2784 } 2785 if (!pcbddc->use_faces) { 2786 for (i=0;i<n_ISForFaces;i++) { 2787 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 2788 } 2789 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 2790 n_ISForFaces = 0; 2791 } 2792 2793 #if defined(PETSC_USE_DEBUG) 2794 /* HACK: when solving singular problems not using vertices, a change of basis is mandatory. 2795 Also use_change_of_basis should be consistent among processors */ 2796 if (pcbddc->NullSpace) { 2797 PetscBool tbool[2],gbool[2]; 2798 2799 if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) { 2800 pcbddc->use_change_of_basis = PETSC_TRUE; 2801 if (!ISForEdges) { 2802 pcbddc->use_change_on_faces = PETSC_TRUE; 2803 } 2804 } 2805 tbool[0] = pcbddc->use_change_of_basis; 2806 tbool[1] = pcbddc->use_change_on_faces; 2807 ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2808 pcbddc->use_change_of_basis = gbool[0]; 2809 pcbddc->use_change_on_faces = gbool[1]; 2810 } 2811 #endif 2812 2813 /* check if near null space is attached to global mat */ 2814 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 2815 if (nearnullsp) { 2816 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 2817 /* remove any stored info */ 2818 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 2819 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2820 /* store information for BDDC solver reuse */ 2821 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 2822 pcbddc->onearnullspace = nearnullsp; 2823 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 2824 for (i=0;i<nnsp_size;i++) { 2825 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 2826 } 2827 } else { /* if near null space is not provided BDDC uses constants by default */ 2828 nnsp_size = 0; 2829 nnsp_has_cnst = PETSC_TRUE; 2830 } 2831 /* get max number of constraints on a single cc */ 2832 max_constraints = nnsp_size; 2833 if (nnsp_has_cnst) max_constraints++; 2834 2835 /* 2836 Evaluate maximum storage size needed by the procedure 2837 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 2838 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 2839 There can be multiple constraints per connected component 2840 */ 2841 n_vertices = 0; 2842 if (ISForVertices) { 2843 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 2844 } 2845 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 2846 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 2847 2848 total_counts = n_ISForFaces+n_ISForEdges; 2849 total_counts *= max_constraints; 2850 total_counts += n_vertices; 2851 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 2852 2853 total_counts = 0; 2854 max_size_of_constraint = 0; 2855 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 2856 IS used_is; 2857 if (i<n_ISForEdges) { 2858 used_is = ISForEdges[i]; 2859 } else { 2860 used_is = ISForFaces[i-n_ISForEdges]; 2861 } 2862 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 2863 total_counts += j; 2864 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 2865 } 2866 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 2867 2868 /* get local part of global near null space vectors */ 2869 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 2870 for (k=0;k<nnsp_size;k++) { 2871 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 2872 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2873 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2874 } 2875 2876 /* whether or not to skip lapack calls */ 2877 skip_lapack = PETSC_TRUE; 2878 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 2879 2880 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 2881 if (!skip_lapack) { 2882 PetscScalar temp_work; 2883 2884 #if defined(PETSC_MISSING_LAPACK_GESVD) 2885 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 2886 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 2887 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 2888 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 2889 #if defined(PETSC_USE_COMPLEX) 2890 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 2891 #endif 2892 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2893 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 2894 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 2895 lwork = -1; 2896 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2897 #if !defined(PETSC_USE_COMPLEX) 2898 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 2899 #else 2900 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 2901 #endif 2902 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2903 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 2904 #else /* on missing GESVD */ 2905 /* SVD */ 2906 PetscInt max_n,min_n; 2907 max_n = max_size_of_constraint; 2908 min_n = max_constraints; 2909 if (max_size_of_constraint < max_constraints) { 2910 min_n = max_size_of_constraint; 2911 max_n = max_constraints; 2912 } 2913 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 2914 #if defined(PETSC_USE_COMPLEX) 2915 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 2916 #endif 2917 /* now we evaluate the optimal workspace using query with lwork=-1 */ 2918 lwork = -1; 2919 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 2920 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 2921 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 2922 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2923 #if !defined(PETSC_USE_COMPLEX) 2924 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 2925 #else 2926 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 2927 #endif 2928 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2929 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 2930 #endif /* on missing GESVD */ 2931 /* Allocate optimal workspace */ 2932 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 2933 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 2934 } 2935 /* Now we can loop on constraining sets */ 2936 total_counts = 0; 2937 constraints_idxs_ptr[0] = 0; 2938 constraints_data_ptr[0] = 0; 2939 /* vertices */ 2940 if (n_vertices) { 2941 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2942 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 2943 for (i=0;i<n_vertices;i++) { 2944 constraints_n[total_counts] = 1; 2945 constraints_data[total_counts] = 1.0; 2946 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 2947 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 2948 total_counts++; 2949 } 2950 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2951 n_vertices = total_counts; 2952 } 2953 2954 /* edges and faces */ 2955 total_counts_cc = total_counts; 2956 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 2957 IS used_is; 2958 PetscBool idxs_copied = PETSC_FALSE; 2959 2960 if (ncc<n_ISForEdges) { 2961 used_is = ISForEdges[ncc]; 2962 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 2963 } else { 2964 used_is = ISForFaces[ncc-n_ISForEdges]; 2965 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 2966 } 2967 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 2968 2969 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 2970 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2971 /* change of basis should not be performed on local periodic nodes */ 2972 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 2973 if (nnsp_has_cnst) { 2974 PetscScalar quad_value; 2975 2976 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 2977 idxs_copied = PETSC_TRUE; 2978 2979 if (!pcbddc->use_nnsp_true) { 2980 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 2981 } else { 2982 quad_value = 1.0; 2983 } 2984 for (j=0;j<size_of_constraint;j++) { 2985 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 2986 } 2987 temp_constraints++; 2988 total_counts++; 2989 } 2990 for (k=0;k<nnsp_size;k++) { 2991 PetscReal real_value; 2992 PetscScalar *ptr_to_data; 2993 2994 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 2995 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 2996 for (j=0;j<size_of_constraint;j++) { 2997 ptr_to_data[j] = array[is_indices[j]]; 2998 } 2999 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 3000 /* check if array is null on the connected component */ 3001 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3002 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 3003 if (real_value > 0.0) { /* keep indices and values */ 3004 temp_constraints++; 3005 total_counts++; 3006 if (!idxs_copied) { 3007 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 3008 idxs_copied = PETSC_TRUE; 3009 } 3010 } 3011 } 3012 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 3013 valid_constraints = temp_constraints; 3014 if (!pcbddc->use_nnsp_true && temp_constraints) { 3015 if (temp_constraints == 1) { /* just normalize the constraint */ 3016 PetscScalar norm,*ptr_to_data; 3017 3018 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3019 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3020 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 3021 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 3022 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 3023 } else { /* perform SVD */ 3024 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 3025 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 3026 3027 #if defined(PETSC_MISSING_LAPACK_GESVD) 3028 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 3029 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 3030 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 3031 the constraints basis will differ (by a complex factor with absolute value equal to 1) 3032 from that computed using LAPACKgesvd 3033 -> This is due to a different computation of eigenvectors in LAPACKheev 3034 -> The quality of the POD-computed basis will be the same */ 3035 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3036 /* Store upper triangular part of correlation matrix */ 3037 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3038 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3039 for (j=0;j<temp_constraints;j++) { 3040 for (k=0;k<j+1;k++) { 3041 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 3042 } 3043 } 3044 /* compute eigenvalues and eigenvectors of correlation matrix */ 3045 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3046 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 3047 #if !defined(PETSC_USE_COMPLEX) 3048 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 3049 #else 3050 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 3051 #endif 3052 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3053 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 3054 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 3055 j = 0; 3056 while (j < temp_constraints && singular_vals[j] < tol) j++; 3057 total_counts = total_counts-j; 3058 valid_constraints = temp_constraints-j; 3059 /* scale and copy POD basis into used quadrature memory */ 3060 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3061 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3062 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 3063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3064 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 3065 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3066 if (j<temp_constraints) { 3067 PetscInt ii; 3068 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 3069 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3070 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 3071 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3072 for (k=0;k<temp_constraints-j;k++) { 3073 for (ii=0;ii<size_of_constraint;ii++) { 3074 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 3075 } 3076 } 3077 } 3078 #else /* on missing GESVD */ 3079 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3080 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 3081 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3082 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3083 #if !defined(PETSC_USE_COMPLEX) 3084 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 3085 #else 3086 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 3087 #endif 3088 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 3089 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3090 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 3091 k = temp_constraints; 3092 if (k > size_of_constraint) k = size_of_constraint; 3093 j = 0; 3094 while (j < k && singular_vals[k-j-1] < tol) j++; 3095 valid_constraints = k-j; 3096 total_counts = total_counts-temp_constraints+valid_constraints; 3097 #endif /* on missing GESVD */ 3098 } 3099 } 3100 /* update pointers information */ 3101 if (valid_constraints) { 3102 constraints_n[total_counts_cc] = valid_constraints; 3103 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 3104 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 3105 /* set change_of_basis flag */ 3106 if (boolforchange) { 3107 PetscBTSet(change_basis,total_counts_cc); 3108 } 3109 total_counts_cc++; 3110 } 3111 } 3112 /* free workspace */ 3113 if (!skip_lapack) { 3114 ierr = PetscFree(work);CHKERRQ(ierr); 3115 #if defined(PETSC_USE_COMPLEX) 3116 ierr = PetscFree(rwork);CHKERRQ(ierr); 3117 #endif 3118 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 3119 #if defined(PETSC_MISSING_LAPACK_GESVD) 3120 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 3121 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 3122 #endif 3123 } 3124 for (k=0;k<nnsp_size;k++) { 3125 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 3126 } 3127 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 3128 /* free index sets of faces, edges and vertices */ 3129 for (i=0;i<n_ISForFaces;i++) { 3130 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 3131 } 3132 if (n_ISForFaces) { 3133 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 3134 } 3135 for (i=0;i<n_ISForEdges;i++) { 3136 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 3137 } 3138 if (n_ISForEdges) { 3139 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 3140 } 3141 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 3142 } else { 3143 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3144 3145 total_counts = 0; 3146 n_vertices = 0; 3147 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3148 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 3149 } 3150 max_constraints = 0; 3151 total_counts_cc = 0; 3152 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3153 total_counts += pcbddc->adaptive_constraints_n[i]; 3154 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 3155 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 3156 } 3157 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 3158 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 3159 constraints_idxs = pcbddc->adaptive_constraints_idxs; 3160 constraints_data = pcbddc->adaptive_constraints_data; 3161 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 3162 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 3163 total_counts_cc = 0; 3164 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 3165 if (pcbddc->adaptive_constraints_n[i]) { 3166 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 3167 } 3168 } 3169 #if 0 3170 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 3171 for (i=0;i<total_counts_cc;i++) { 3172 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 3173 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 3174 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 3175 printf(" %d",constraints_idxs[j]); 3176 } 3177 printf("\n"); 3178 printf("number of cc: %d\n",constraints_n[i]); 3179 } 3180 for (i=0;i<n_vertices;i++) { 3181 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 3182 } 3183 for (i=0;i<sub_schurs->n_subs;i++) { 3184 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 3185 } 3186 #endif 3187 3188 max_size_of_constraint = 0; 3189 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 3190 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 3191 /* Change of basis */ 3192 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 3193 if (pcbddc->use_change_of_basis) { 3194 for (i=0;i<sub_schurs->n_subs;i++) { 3195 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 3196 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 3197 } 3198 } 3199 } 3200 } 3201 pcbddc->local_primal_size = total_counts; 3202 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3203 3204 /* map constraints_idxs in boundary numbering */ 3205 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 3206 if (i != constraints_idxs_ptr[total_counts_cc]) { 3207 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i); 3208 } 3209 3210 /* Create constraint matrix */ 3211 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3212 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 3213 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 3214 3215 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 3216 /* determine if a QR strategy is needed for change of basis */ 3217 qr_needed = PETSC_FALSE; 3218 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 3219 total_primal_vertices=0; 3220 pcbddc->local_primal_size_cc = 0; 3221 for (i=0;i<total_counts_cc;i++) { 3222 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3223 if (size_of_constraint == 1) { 3224 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 3225 pcbddc->local_primal_size_cc += 1; 3226 } else if (PetscBTLookup(change_basis,i)) { 3227 for (k=0;k<constraints_n[i];k++) { 3228 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3229 } 3230 pcbddc->local_primal_size_cc += constraints_n[i]; 3231 if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) { 3232 PetscBTSet(qr_needed_idx,i); 3233 qr_needed = PETSC_TRUE; 3234 } 3235 } else { 3236 pcbddc->local_primal_size_cc += 1; 3237 } 3238 } 3239 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 3240 pcbddc->n_vertices = total_primal_vertices; 3241 /* permute indices in order to have a sorted set of vertices */ 3242 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3243 3244 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3245 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 3246 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 3247 3248 /* nonzero structure of constraint matrix */ 3249 /* and get reference dof for local constraints */ 3250 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 3251 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 3252 3253 j = total_primal_vertices; 3254 total_counts = total_primal_vertices; 3255 cum = total_primal_vertices; 3256 for (i=n_vertices;i<total_counts_cc;i++) { 3257 if (!PetscBTLookup(change_basis,i)) { 3258 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 3259 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 3260 cum++; 3261 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3262 for (k=0;k<constraints_n[i];k++) { 3263 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 3264 nnz[j+k] = size_of_constraint; 3265 } 3266 j += constraints_n[i]; 3267 } 3268 } 3269 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 3270 ierr = PetscFree(nnz);CHKERRQ(ierr); 3271 3272 /* set values in constraint matrix */ 3273 for (i=0;i<total_primal_vertices;i++) { 3274 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3275 } 3276 total_counts = total_primal_vertices; 3277 for (i=n_vertices;i<total_counts_cc;i++) { 3278 if (!PetscBTLookup(change_basis,i)) { 3279 PetscInt *cols; 3280 3281 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3282 cols = constraints_idxs+constraints_idxs_ptr[i]; 3283 for (k=0;k<constraints_n[i];k++) { 3284 PetscInt row = total_counts+k; 3285 PetscScalar *vals; 3286 3287 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 3288 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 3289 } 3290 total_counts += constraints_n[i]; 3291 } 3292 } 3293 /* assembling */ 3294 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3295 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3296 3297 /* 3298 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 3299 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 3300 */ 3301 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 3302 if (pcbddc->use_change_of_basis) { 3303 /* dual and primal dofs on a single cc */ 3304 PetscInt dual_dofs,primal_dofs; 3305 /* working stuff for GEQRF */ 3306 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 3307 PetscBLASInt lqr_work; 3308 /* working stuff for UNGQR */ 3309 PetscScalar *gqr_work,lgqr_work_t; 3310 PetscBLASInt lgqr_work; 3311 /* working stuff for TRTRS */ 3312 PetscScalar *trs_rhs; 3313 PetscBLASInt Blas_NRHS; 3314 /* pointers for values insertion into change of basis matrix */ 3315 PetscInt *start_rows,*start_cols; 3316 PetscScalar *start_vals; 3317 /* working stuff for values insertion */ 3318 PetscBT is_primal; 3319 PetscInt *aux_primal_numbering_B; 3320 /* matrix sizes */ 3321 PetscInt global_size,local_size; 3322 /* temporary change of basis */ 3323 Mat localChangeOfBasisMatrix; 3324 /* extra space for debugging */ 3325 PetscScalar *dbg_work; 3326 3327 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 3328 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 3329 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3330 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 3331 /* nonzeros for local mat */ 3332 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 3333 for (i=0;i<pcis->n;i++) nnz[i]=1; 3334 for (i=n_vertices;i<total_counts_cc;i++) { 3335 if (PetscBTLookup(change_basis,i)) { 3336 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 3337 if (PetscBTLookup(qr_needed_idx,i)) { 3338 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 3339 } else { 3340 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 3341 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 3342 } 3343 } 3344 } 3345 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 3346 ierr = PetscFree(nnz);CHKERRQ(ierr); 3347 /* Set initial identity in the matrix */ 3348 for (i=0;i<pcis->n;i++) { 3349 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 3350 } 3351 3352 if (pcbddc->dbg_flag) { 3353 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 3354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3355 } 3356 3357 3358 /* Now we loop on the constraints which need a change of basis */ 3359 /* 3360 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 3361 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 3362 3363 Basic blocks of change of basis matrix T computed by 3364 3365 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 3366 3367 | 1 0 ... 0 s_1/S | 3368 | 0 1 ... 0 s_2/S | 3369 | ... | 3370 | 0 ... 1 s_{n-1}/S | 3371 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 3372 3373 with S = \sum_{i=1}^n s_i^2 3374 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 3375 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 3376 3377 - QR decomposition of constraints otherwise 3378 */ 3379 if (qr_needed) { 3380 /* space to store Q */ 3381 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 3382 /* first we issue queries for optimal work */ 3383 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3384 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 3385 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3386 lqr_work = -1; 3387 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 3388 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 3389 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 3390 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 3391 lgqr_work = -1; 3392 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 3393 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 3394 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 3395 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3396 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 3397 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 3398 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 3399 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 3400 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 3401 /* array to store scaling factors for reflectors */ 3402 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 3403 /* array to store rhs and solution of triangular solver */ 3404 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 3405 /* allocating workspace for check */ 3406 if (pcbddc->dbg_flag) { 3407 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 3408 } 3409 } 3410 /* array to store whether a node is primal or not */ 3411 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 3412 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 3413 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 3414 if (i != total_primal_vertices) { 3415 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 3416 } 3417 for (i=0;i<total_primal_vertices;i++) { 3418 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 3419 } 3420 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 3421 3422 /* loop on constraints and see whether or not they need a change of basis and compute it */ 3423 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 3424 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 3425 if (PetscBTLookup(change_basis,total_counts)) { 3426 /* get constraint info */ 3427 primal_dofs = constraints_n[total_counts]; 3428 dual_dofs = size_of_constraint-primal_dofs; 3429 3430 if (pcbddc->dbg_flag) { 3431 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 3432 } 3433 3434 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 3435 3436 /* copy quadrature constraints for change of basis check */ 3437 if (pcbddc->dbg_flag) { 3438 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3439 } 3440 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 3441 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3442 3443 /* compute QR decomposition of constraints */ 3444 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3445 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3446 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3447 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3448 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 3449 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 3450 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3451 3452 /* explictly compute R^-T */ 3453 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 3454 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 3455 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3456 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 3457 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3458 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3459 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3460 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 3461 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 3462 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3463 3464 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 3465 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3466 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3467 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3468 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3469 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3470 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 3471 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 3472 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3473 3474 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 3475 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 3476 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 3477 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 3478 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 3479 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 3480 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3481 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 3482 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 3483 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3484 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 3485 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3486 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 3487 3488 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 3489 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 3490 /* insert cols for primal dofs */ 3491 for (j=0;j<primal_dofs;j++) { 3492 start_vals = &qr_basis[j*size_of_constraint]; 3493 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3494 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3495 } 3496 /* insert cols for dual dofs */ 3497 for (j=0,k=0;j<dual_dofs;k++) { 3498 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 3499 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 3500 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3501 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 3502 j++; 3503 } 3504 } 3505 3506 /* check change of basis */ 3507 if (pcbddc->dbg_flag) { 3508 PetscInt ii,jj; 3509 PetscBool valid_qr=PETSC_TRUE; 3510 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 3511 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3512 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 3513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 3514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 3515 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 3516 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3517 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 3518 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3519 for (jj=0;jj<size_of_constraint;jj++) { 3520 for (ii=0;ii<primal_dofs;ii++) { 3521 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 3522 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 3523 } 3524 } 3525 if (!valid_qr) { 3526 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 3527 for (jj=0;jj<size_of_constraint;jj++) { 3528 for (ii=0;ii<primal_dofs;ii++) { 3529 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 3530 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3531 } 3532 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 3533 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 3534 } 3535 } 3536 } 3537 } else { 3538 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 3539 } 3540 } 3541 } else { /* simple transformation block */ 3542 PetscInt row,col; 3543 PetscScalar val,norm; 3544 3545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 3546 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 3547 for (j=0;j<size_of_constraint;j++) { 3548 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 3549 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 3550 if (!PetscBTLookup(is_primal,row_B)) { 3551 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 3552 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 3553 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 3554 } else { 3555 for (k=0;k<size_of_constraint;k++) { 3556 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 3557 if (row != col) { 3558 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 3559 } else { 3560 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 3561 } 3562 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 3563 } 3564 } 3565 } 3566 if (pcbddc->dbg_flag) { 3567 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 3568 } 3569 } 3570 } else { 3571 if (pcbddc->dbg_flag) { 3572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 3573 } 3574 } 3575 } 3576 3577 /* free workspace */ 3578 if (qr_needed) { 3579 if (pcbddc->dbg_flag) { 3580 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 3581 } 3582 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 3583 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 3584 ierr = PetscFree(qr_work);CHKERRQ(ierr); 3585 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 3586 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 3587 } 3588 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 3589 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3590 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3591 3592 /* assembling of global change of variable */ 3593 { 3594 Mat tmat; 3595 PetscInt bs; 3596 3597 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3598 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3599 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 3600 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 3601 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3602 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 3603 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 3604 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 3605 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3606 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 3607 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3608 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3609 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3610 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 3611 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3612 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3613 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 3614 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 3615 } 3616 /* check */ 3617 if (pcbddc->dbg_flag) { 3618 PetscReal error; 3619 Vec x,x_change; 3620 3621 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 3622 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 3623 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 3624 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 3625 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3626 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3627 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 3628 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3629 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3630 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 3631 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 3632 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 3633 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3634 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr); 3635 ierr = VecDestroy(&x);CHKERRQ(ierr); 3636 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 3637 } 3638 3639 /* adapt sub_schurs computed (if any) */ 3640 if (pcbddc->use_deluxe_scaling) { 3641 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 3642 if (sub_schurs->S_Ej_all) { 3643 Mat S_new,tmat; 3644 ISLocalToGlobalMapping NtoSall; 3645 IS is_all_N,is_V,is_V_Sall; 3646 const PetscScalar *array; 3647 const PetscInt *idxs_V,*idxs_all; 3648 PetscInt i,n_V; 3649 3650 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 3651 ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 3652 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 3653 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 3654 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 3655 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 3656 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 3657 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 3658 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3659 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 3660 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3661 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3662 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 3663 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3664 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3665 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 3666 for (i=0;i<n_V;i++) { 3667 PetscScalar val; 3668 PetscInt idx; 3669 3670 idx = idxs_V[i]; 3671 val = array[idxs_all[idxs_V[i]]]; 3672 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 3673 } 3674 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3675 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3676 sub_schurs->S_Ej_all = S_new; 3677 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3678 if (sub_schurs->sum_S_Ej_all) { 3679 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 3680 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 3681 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 3682 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 3683 sub_schurs->sum_S_Ej_all = S_new; 3684 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 3685 } 3686 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 3687 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 3688 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 3689 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 3690 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 3691 } 3692 } 3693 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 3694 } else if (pcbddc->user_ChangeOfBasisMatrix) { 3695 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3696 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 3697 } 3698 3699 /* set up change of basis context */ 3700 if (pcbddc->ChangeOfBasisMatrix) { 3701 PCBDDCChange_ctx change_ctx; 3702 3703 if (!pcbddc->new_global_mat) { 3704 PetscInt global_size,local_size; 3705 3706 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 3707 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 3708 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr); 3709 ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 3710 ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr); 3711 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr); 3712 ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr); 3713 ierr = PetscNew(&change_ctx);CHKERRQ(ierr); 3714 ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr); 3715 } else { 3716 ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr); 3717 ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr); 3718 ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr); 3719 } 3720 if (!pcbddc->user_ChangeOfBasisMatrix) { 3721 ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3722 change_ctx->global_change = pcbddc->ChangeOfBasisMatrix; 3723 } else { 3724 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3725 change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix; 3726 } 3727 ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr); 3728 ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr); 3729 ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3730 ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3731 } else { 3732 ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr); 3733 } 3734 3735 /* add pressure dofs to set of primal nodes for numbering purposes */ 3736 for (i=0;i<pcbddc->benign_n;i++) { 3737 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 3738 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 3739 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 3740 pcbddc->local_primal_size_cc++; 3741 pcbddc->local_primal_size++; 3742 } 3743 3744 /* check if a new primal space has been introduced (also take into account benign trick) */ 3745 pcbddc->new_primal_space_local = PETSC_TRUE; 3746 if (olocal_primal_size == pcbddc->local_primal_size) { 3747 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3748 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3749 if (!pcbddc->new_primal_space_local) { 3750 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 3751 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 3752 } 3753 } 3754 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 3755 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 3756 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3757 3758 /* flush dbg viewer */ 3759 if (pcbddc->dbg_flag) { 3760 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3761 } 3762 3763 /* free workspace */ 3764 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 3765 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 3766 if (!pcbddc->adaptive_selection) { 3767 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 3768 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 3769 } else { 3770 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 3771 pcbddc->adaptive_constraints_idxs_ptr, 3772 pcbddc->adaptive_constraints_data_ptr, 3773 pcbddc->adaptive_constraints_idxs, 3774 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3775 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 3776 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 3777 } 3778 PetscFunctionReturn(0); 3779 } 3780 3781 #undef __FUNCT__ 3782 #define __FUNCT__ "PCBDDCAnalyzeInterface" 3783 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 3784 { 3785 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3786 PC_IS *pcis = (PC_IS*)pc->data; 3787 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3788 PetscInt ierr,i,vertex_size,N; 3789 PetscViewer viewer=pcbddc->dbg_viewer; 3790 3791 PetscFunctionBegin; 3792 /* Reset previously computed graph */ 3793 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 3794 /* Init local Graph struct */ 3795 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 3796 ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr); 3797 3798 /* Check validity of the csr graph passed in by the user */ 3799 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 3800 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 3801 } 3802 3803 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 3804 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 3805 PetscInt *xadj,*adjncy; 3806 PetscInt nvtxs; 3807 PetscBool flg_row=PETSC_FALSE; 3808 3809 if (pcbddc->use_local_adj) { 3810 3811 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3812 if (flg_row) { 3813 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 3814 pcbddc->computed_rowadj = PETSC_TRUE; 3815 } 3816 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3817 } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */ 3818 IS is_dummy; 3819 ISLocalToGlobalMapping l2gmap_dummy; 3820 PetscInt j,sum; 3821 PetscInt *cxadj,*cadjncy; 3822 const PetscInt *idxs; 3823 PCBDDCGraph graph; 3824 PetscBT is_on_boundary; 3825 3826 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr); 3827 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 3828 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 3829 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 3830 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr); 3831 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 3832 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3833 if (flg_row) { 3834 graph->xadj = xadj; 3835 graph->adjncy = adjncy; 3836 } 3837 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 3838 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 3839 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 3840 3841 if (pcbddc->dbg_flag) { 3842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr); 3843 for (i=0;i<graph->ncc;i++) { 3844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr); 3845 } 3846 } 3847 3848 ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr); 3849 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3850 for (i=0;i<pcis->n_B;i++) { 3851 ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr); 3852 } 3853 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 3854 3855 ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr); 3856 sum = 0; 3857 for (i=0;i<graph->ncc;i++) { 3858 PetscInt sizecc = 0; 3859 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3860 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3861 sizecc++; 3862 } 3863 } 3864 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3865 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3866 cxadj[graph->queue[j]] = sizecc; 3867 } 3868 } 3869 sum += sizecc*sizecc; 3870 } 3871 ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr); 3872 sum = 0; 3873 for (i=0;i<pcis->n;i++) { 3874 PetscInt temp = cxadj[i]; 3875 cxadj[i] = sum; 3876 sum += temp; 3877 } 3878 cxadj[pcis->n] = sum; 3879 for (i=0;i<graph->ncc;i++) { 3880 for (j=graph->cptr[i];j<graph->cptr[i+1];j++) { 3881 if (PetscBTLookup(is_on_boundary,graph->queue[j])) { 3882 PetscInt k,sizecc = 0; 3883 for (k=graph->cptr[i];k<graph->cptr[i+1];k++) { 3884 if (PetscBTLookup(is_on_boundary,graph->queue[k])) { 3885 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k]; 3886 sizecc++; 3887 } 3888 } 3889 } 3890 } 3891 } 3892 if (sum) { 3893 ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr); 3894 } else { 3895 ierr = PetscFree(cxadj);CHKERRQ(ierr); 3896 ierr = PetscFree(cadjncy);CHKERRQ(ierr); 3897 } 3898 graph->xadj = 0; 3899 graph->adjncy = 0; 3900 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 3901 ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr); 3902 } 3903 } 3904 if (pcbddc->dbg_flag) { 3905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3906 } 3907 3908 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */ 3909 vertex_size = 1; 3910 if (pcbddc->user_provided_isfordofs) { 3911 if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */ 3912 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3913 for (i=0;i<pcbddc->n_ISForDofs;i++) { 3914 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3915 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 3916 } 3917 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 3918 pcbddc->n_ISForDofs = 0; 3919 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 3920 } 3921 /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 3922 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 3923 } else { 3924 if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */ 3925 ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr); 3926 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 3927 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 3928 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 3929 } 3930 } 3931 } 3932 3933 /* Setup of Graph */ 3934 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */ 3935 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3936 } 3937 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */ 3938 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3939 } 3940 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */ 3941 ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3942 } 3943 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3944 3945 /* attach info on disconnected subdomains if present */ 3946 if (pcbddc->n_local_subs) { 3947 PetscInt *local_subs; 3948 3949 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 3950 for (i=0;i<pcbddc->n_local_subs;i++) { 3951 const PetscInt *idxs; 3952 PetscInt nl,j; 3953 3954 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 3955 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3956 for (j=0;j<nl;j++) { 3957 local_subs[idxs[j]] = i; 3958 } 3959 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 3960 } 3961 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 3962 pcbddc->mat_graph->local_subs = local_subs; 3963 } 3964 3965 /* Graph's connected components analysis */ 3966 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 3967 3968 /* print some info to stdout */ 3969 if (pcbddc->dbg_flag) { 3970 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr); 3971 } 3972 3973 /* mark topography has done */ 3974 pcbddc->recompute_topography = PETSC_FALSE; 3975 PetscFunctionReturn(0); 3976 } 3977 3978 /* given an index sets possibly with holes, renumbers the indexes removing the holes */ 3979 #undef __FUNCT__ 3980 #define __FUNCT__ "PCBDDCSubsetNumbering" 3981 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n) 3982 { 3983 PetscSF sf; 3984 PetscLayout map; 3985 const PetscInt *idxs; 3986 PetscInt *leaf_data,*root_data,*gidxs; 3987 PetscInt N,n,i,lbounds[2],gbounds[2],Nl; 3988 PetscInt n_n,nlocals,start,first_index; 3989 PetscMPIInt commsize; 3990 PetscBool first_found; 3991 PetscErrorCode ierr; 3992 3993 PetscFunctionBegin; 3994 ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr); 3995 if (subset_mult) { 3996 PetscCheckSameComm(subset,1,subset_mult,2); 3997 ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr); 3998 if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i); 3999 } 4000 /* create workspace layout for computing global indices of subset */ 4001 ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr); 4002 lbounds[0] = lbounds[1] = 0; 4003 for (i=0;i<n;i++) { 4004 if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i]; 4005 else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i]; 4006 } 4007 lbounds[0] = -lbounds[0]; 4008 ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4009 gbounds[0] = -gbounds[0]; 4010 N = gbounds[1] - gbounds[0] + 1; 4011 ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr); 4012 ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr); 4013 ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 4014 ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 4015 ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr); 4016 4017 /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */ 4018 ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr); 4019 if (subset_mult) { 4020 const PetscInt* idxs_mult; 4021 4022 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4023 ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr); 4024 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4025 } else { 4026 for (i=0;i<n;i++) leaf_data[i] = 1; 4027 } 4028 /* local size of new subset */ 4029 n_n = 0; 4030 for (i=0;i<n;i++) n_n += leaf_data[i]; 4031 4032 /* global indexes in layout */ 4033 ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */ 4034 for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0]; 4035 ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr); 4036 ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr); 4037 ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr); 4038 ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 4039 4040 /* reduce from leaves to roots */ 4041 ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr); 4042 ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4043 ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr); 4044 4045 /* count indexes in local part of layout */ 4046 nlocals = 0; 4047 first_index = -1; 4048 first_found = PETSC_FALSE; 4049 for (i=0;i<Nl;i++) { 4050 if (!first_found && root_data[i]) { 4051 first_found = PETSC_TRUE; 4052 first_index = i; 4053 } 4054 nlocals += root_data[i]; 4055 } 4056 4057 /* cumulative of number of indexes and size of subset without holes */ 4058 #if defined(PETSC_HAVE_MPI_EXSCAN) 4059 start = 0; 4060 ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4061 #else 4062 ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4063 start = start-nlocals; 4064 #endif 4065 4066 if (N_n) { /* compute total size of new subset if requested */ 4067 *N_n = start + nlocals; 4068 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr); 4069 ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr); 4070 } 4071 4072 /* adapt root data with cumulative */ 4073 if (first_found) { 4074 PetscInt old_index; 4075 4076 root_data[first_index] += start; 4077 old_index = first_index; 4078 for (i=first_index+1;i<Nl;i++) { 4079 if (root_data[i]) { 4080 root_data[i] += root_data[old_index]; 4081 old_index = i; 4082 } 4083 } 4084 } 4085 4086 /* from roots to leaves */ 4087 ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4088 ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr); 4089 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 4090 4091 /* create new IS with global indexes without holes */ 4092 if (subset_mult) { 4093 const PetscInt* idxs_mult; 4094 PetscInt cum; 4095 4096 cum = 0; 4097 ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4098 for (i=0;i<n;i++) { 4099 PetscInt j; 4100 for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j; 4101 } 4102 ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr); 4103 } else { 4104 for (i=0;i<n;i++) { 4105 gidxs[i] = leaf_data[i]-1; 4106 } 4107 } 4108 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr); 4109 ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr); 4110 PetscFunctionReturn(0); 4111 } 4112 4113 #undef __FUNCT__ 4114 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 4115 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 4116 { 4117 PetscInt i,j; 4118 PetscScalar *alphas; 4119 PetscErrorCode ierr; 4120 4121 PetscFunctionBegin; 4122 /* this implements stabilized Gram-Schmidt */ 4123 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 4124 for (i=0;i<n;i++) { 4125 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 4126 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 4127 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 4128 } 4129 ierr = PetscFree(alphas);CHKERRQ(ierr); 4130 PetscFunctionReturn(0); 4131 } 4132 4133 #undef __FUNCT__ 4134 #define __FUNCT__ "MatISGetSubassemblingPattern" 4135 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends) 4136 { 4137 IS ranks_send_to; 4138 PetscInt n_neighs,*neighs,*n_shared,**shared; 4139 PetscMPIInt size,rank,color; 4140 PetscInt *xadj,*adjncy; 4141 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 4142 PetscInt i,local_size,threshold=0; 4143 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 4144 PetscSubcomm subcomm; 4145 PetscErrorCode ierr; 4146 4147 PetscFunctionBegin; 4148 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 4149 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 4150 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 4151 4152 /* Get info on mapping */ 4153 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr); 4154 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4155 4156 /* build local CSR graph of subdomains' connectivity */ 4157 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 4158 xadj[0] = 0; 4159 xadj[1] = PetscMax(n_neighs-1,0); 4160 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 4161 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 4162 4163 if (threshold) { 4164 PetscInt xadj_count = 0; 4165 for (i=1;i<n_neighs;i++) { 4166 if (n_shared[i] > threshold) { 4167 adjncy[xadj_count] = neighs[i]; 4168 adjncy_wgt[xadj_count] = n_shared[i]; 4169 xadj_count++; 4170 } 4171 } 4172 xadj[1] = xadj_count; 4173 } else { 4174 if (xadj[1]) { 4175 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 4176 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 4177 } 4178 } 4179 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 4180 if (use_square) { 4181 for (i=0;i<xadj[1];i++) { 4182 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 4183 } 4184 } 4185 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4186 4187 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 4188 4189 /* 4190 Restrict work on active processes only. 4191 */ 4192 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 4193 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 4194 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 4195 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 4196 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4197 if (color) { 4198 ierr = PetscFree(xadj);CHKERRQ(ierr); 4199 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4200 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4201 } else { 4202 Mat subdomain_adj; 4203 IS new_ranks,new_ranks_contig; 4204 MatPartitioning partitioner; 4205 PetscInt prank,rstart=0,rend=0; 4206 PetscInt *is_indices,*oldranks; 4207 PetscBool aggregate; 4208 4209 ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr); 4210 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 4211 prank = rank; 4212 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr); 4213 /* 4214 for (i=0;i<size;i++) { 4215 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 4216 } 4217 */ 4218 for (i=0;i<xadj[1];i++) { 4219 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 4220 } 4221 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 4222 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 4223 if (aggregate) { 4224 PetscInt lrows,row,ncols,*cols; 4225 PetscMPIInt nrank; 4226 PetscScalar *vals; 4227 4228 ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr); 4229 lrows = 0; 4230 if (nrank<redprocs) { 4231 lrows = size/redprocs; 4232 if (nrank<size%redprocs) lrows++; 4233 } 4234 ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 4235 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 4236 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4237 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4238 row = nrank; 4239 ncols = xadj[1]-xadj[0]; 4240 cols = adjncy; 4241 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 4242 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 4243 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 4244 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4245 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4246 ierr = PetscFree(xadj);CHKERRQ(ierr); 4247 ierr = PetscFree(adjncy);CHKERRQ(ierr); 4248 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 4249 ierr = PetscFree(vals);CHKERRQ(ierr); 4250 } else { 4251 ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 4252 } 4253 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 4254 4255 /* Partition */ 4256 ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr); 4257 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 4258 if (use_vwgt) { 4259 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 4260 v_wgt[0] = local_size; 4261 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 4262 } 4263 n_subdomains = PetscMin((PetscInt)size,n_subdomains); 4264 ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr); 4265 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 4266 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 4267 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 4268 4269 /* renumber new_ranks to avoid "holes" in new set of processors */ 4270 ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 4271 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 4272 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4273 if (!redprocs) { 4274 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 4275 } else { 4276 PetscInt idxs[1]; 4277 PetscMPIInt tag; 4278 MPI_Request *reqs; 4279 4280 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 4281 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 4282 for (i=rstart;i<rend;i++) { 4283 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr); 4284 } 4285 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr); 4286 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4287 ierr = PetscFree(reqs);CHKERRQ(ierr); 4288 ranks_send_to_idx[0] = oldranks[idxs[0]]; 4289 } 4290 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4291 /* clean up */ 4292 ierr = PetscFree(oldranks);CHKERRQ(ierr); 4293 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 4294 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 4295 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 4296 } 4297 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4298 4299 /* assemble parallel IS for sends */ 4300 i = 1; 4301 if (color) i=0; 4302 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 4303 /* get back IS */ 4304 *is_sends = ranks_send_to; 4305 PetscFunctionReturn(0); 4306 } 4307 4308 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 4309 4310 #undef __FUNCT__ 4311 #define __FUNCT__ "MatISSubassemble" 4312 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[]) 4313 { 4314 Mat local_mat; 4315 IS is_sends_internal; 4316 PetscInt rows,cols,new_local_rows; 4317 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals; 4318 PetscBool ismatis,isdense,newisdense,destroy_mat; 4319 ISLocalToGlobalMapping l2gmap; 4320 PetscInt* l2gmap_indices; 4321 const PetscInt* is_indices; 4322 MatType new_local_type; 4323 /* buffers */ 4324 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 4325 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 4326 PetscInt *recv_buffer_idxs_local; 4327 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 4328 /* MPI */ 4329 MPI_Comm comm,comm_n; 4330 PetscSubcomm subcomm; 4331 PetscMPIInt n_sends,n_recvs,commsize; 4332 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 4333 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 4334 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,source_dest; 4335 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals; 4336 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals; 4337 PetscErrorCode ierr; 4338 4339 PetscFunctionBegin; 4340 /* TODO: add missing checks */ 4341 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 4342 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 4343 PetscValidLogicalCollectiveEnum(mat,reuse,5); 4344 PetscValidLogicalCollectiveInt(mat,nis,7); 4345 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 4346 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__); 4347 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4348 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 4349 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 4350 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 4351 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 4352 if (reuse == MAT_REUSE_MATRIX && *mat_n) { 4353 PetscInt mrows,mcols,mnrows,mncols; 4354 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 4355 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 4356 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 4357 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 4358 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 4359 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 4360 } 4361 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 4362 PetscValidLogicalCollectiveInt(mat,bs,0); 4363 /* prepare IS for sending if not provided */ 4364 if (!is_sends) { 4365 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 4366 ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr); 4367 } else { 4368 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 4369 is_sends_internal = is_sends; 4370 } 4371 4372 /* get comm */ 4373 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 4374 4375 /* compute number of sends */ 4376 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 4377 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 4378 4379 /* compute number of receives */ 4380 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 4381 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 4382 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 4383 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4384 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 4385 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 4386 ierr = PetscFree(iflags);CHKERRQ(ierr); 4387 4388 /* restrict comm if requested */ 4389 subcomm = 0; 4390 destroy_mat = PETSC_FALSE; 4391 if (restrict_comm) { 4392 PetscMPIInt color,subcommsize; 4393 4394 color = 0; 4395 if (restrict_full) { 4396 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 4397 } else { 4398 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 4399 } 4400 ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 4401 subcommsize = commsize - subcommsize; 4402 /* check if reuse has been requested */ 4403 if (reuse == MAT_REUSE_MATRIX) { 4404 if (*mat_n) { 4405 PetscMPIInt subcommsize2; 4406 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 4407 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 4408 comm_n = PetscObjectComm((PetscObject)*mat_n); 4409 } else { 4410 comm_n = PETSC_COMM_SELF; 4411 } 4412 } else { /* MAT_INITIAL_MATRIX */ 4413 PetscMPIInt rank; 4414 4415 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4416 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 4417 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 4418 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 4419 comm_n = PetscSubcommChild(subcomm); 4420 } 4421 /* flag to destroy *mat_n if not significative */ 4422 if (color) destroy_mat = PETSC_TRUE; 4423 } else { 4424 comm_n = comm; 4425 } 4426 4427 /* prepare send/receive buffers */ 4428 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 4429 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 4430 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 4431 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 4432 if (nis) { 4433 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 4434 } 4435 4436 /* Get data from local matrices */ 4437 if (!isdense) { 4438 SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 4439 /* TODO: See below some guidelines on how to prepare the local buffers */ 4440 /* 4441 send_buffer_vals should contain the raw values of the local matrix 4442 send_buffer_idxs should contain: 4443 - MatType_PRIVATE type 4444 - PetscInt size_of_l2gmap 4445 - PetscInt global_row_indices[size_of_l2gmap] 4446 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 4447 */ 4448 } else { 4449 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4450 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 4451 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 4452 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 4453 send_buffer_idxs[1] = i; 4454 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4455 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 4456 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 4457 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 4458 for (i=0;i<n_sends;i++) { 4459 ilengths_vals[is_indices[i]] = len*len; 4460 ilengths_idxs[is_indices[i]] = len+2; 4461 } 4462 } 4463 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 4464 /* additional is (if any) */ 4465 if (nis) { 4466 PetscMPIInt psum; 4467 PetscInt j; 4468 for (j=0,psum=0;j<nis;j++) { 4469 PetscInt plen; 4470 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4471 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 4472 psum += len+1; /* indices + lenght */ 4473 } 4474 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 4475 for (j=0,psum=0;j<nis;j++) { 4476 PetscInt plen; 4477 const PetscInt *is_array_idxs; 4478 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 4479 send_buffer_idxs_is[psum] = plen; 4480 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4481 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 4482 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 4483 psum += plen+1; /* indices + lenght */ 4484 } 4485 for (i=0;i<n_sends;i++) { 4486 ilengths_idxs_is[is_indices[i]] = psum; 4487 } 4488 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 4489 } 4490 4491 buf_size_idxs = 0; 4492 buf_size_vals = 0; 4493 buf_size_idxs_is = 0; 4494 for (i=0;i<n_recvs;i++) { 4495 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4496 buf_size_vals += (PetscInt)olengths_vals[i]; 4497 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 4498 } 4499 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 4500 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 4501 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 4502 4503 /* get new tags for clean communications */ 4504 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 4505 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 4506 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 4507 4508 /* allocate for requests */ 4509 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 4510 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 4511 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 4512 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 4513 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 4514 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 4515 4516 /* communications */ 4517 ptr_idxs = recv_buffer_idxs; 4518 ptr_vals = recv_buffer_vals; 4519 ptr_idxs_is = recv_buffer_idxs_is; 4520 for (i=0;i<n_recvs;i++) { 4521 source_dest = onodes[i]; 4522 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 4523 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 4524 ptr_idxs += olengths_idxs[i]; 4525 ptr_vals += olengths_vals[i]; 4526 if (nis) { 4527 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 4528 ptr_idxs_is += olengths_idxs_is[i]; 4529 } 4530 } 4531 for (i=0;i<n_sends;i++) { 4532 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 4533 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 4534 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 4535 if (nis) { 4536 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 4537 } 4538 } 4539 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 4540 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 4541 4542 /* assemble new l2g map */ 4543 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4544 ptr_idxs = recv_buffer_idxs; 4545 new_local_rows = 0; 4546 for (i=0;i<n_recvs;i++) { 4547 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4548 ptr_idxs += olengths_idxs[i]; 4549 } 4550 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 4551 ptr_idxs = recv_buffer_idxs; 4552 new_local_rows = 0; 4553 for (i=0;i<n_recvs;i++) { 4554 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 4555 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 4556 ptr_idxs += olengths_idxs[i]; 4557 } 4558 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 4559 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 4560 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 4561 4562 /* infer new local matrix type from received local matrices type */ 4563 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 4564 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 4565 if (n_recvs) { 4566 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 4567 ptr_idxs = recv_buffer_idxs; 4568 for (i=0;i<n_recvs;i++) { 4569 if ((PetscInt)new_local_type_private != *ptr_idxs) { 4570 new_local_type_private = MATAIJ_PRIVATE; 4571 break; 4572 } 4573 ptr_idxs += olengths_idxs[i]; 4574 } 4575 switch (new_local_type_private) { 4576 case MATDENSE_PRIVATE: 4577 if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */ 4578 new_local_type = MATSEQAIJ; 4579 bs = 1; 4580 } else { /* if I receive only 1 dense matrix */ 4581 new_local_type = MATSEQDENSE; 4582 bs = 1; 4583 } 4584 break; 4585 case MATAIJ_PRIVATE: 4586 new_local_type = MATSEQAIJ; 4587 bs = 1; 4588 break; 4589 case MATBAIJ_PRIVATE: 4590 new_local_type = MATSEQBAIJ; 4591 break; 4592 case MATSBAIJ_PRIVATE: 4593 new_local_type = MATSEQSBAIJ; 4594 break; 4595 default: 4596 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__); 4597 break; 4598 } 4599 } else { /* by default, new_local_type is seqdense */ 4600 new_local_type = MATSEQDENSE; 4601 bs = 1; 4602 } 4603 4604 /* create MATIS object if needed */ 4605 if (reuse == MAT_INITIAL_MATRIX) { 4606 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 4607 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 4608 } else { 4609 /* it also destroys the local matrices */ 4610 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 4611 } 4612 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 4613 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 4614 4615 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4616 4617 /* Global to local map of received indices */ 4618 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 4619 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 4620 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 4621 4622 /* restore attributes -> type of incoming data and its size */ 4623 buf_size_idxs = 0; 4624 for (i=0;i<n_recvs;i++) { 4625 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 4626 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 4627 buf_size_idxs += (PetscInt)olengths_idxs[i]; 4628 } 4629 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 4630 4631 /* set preallocation */ 4632 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 4633 if (!newisdense) { 4634 PetscInt *new_local_nnz=0; 4635 4636 ptr_vals = recv_buffer_vals; 4637 ptr_idxs = recv_buffer_idxs_local; 4638 if (n_recvs) { 4639 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 4640 } 4641 for (i=0;i<n_recvs;i++) { 4642 PetscInt j; 4643 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 4644 for (j=0;j<*(ptr_idxs+1);j++) { 4645 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 4646 } 4647 } else { 4648 /* TODO */ 4649 } 4650 ptr_idxs += olengths_idxs[i]; 4651 } 4652 if (new_local_nnz) { 4653 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 4654 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 4655 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 4656 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4657 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 4658 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 4659 } else { 4660 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4661 } 4662 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 4663 } else { 4664 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 4665 } 4666 4667 /* set values */ 4668 ptr_vals = recv_buffer_vals; 4669 ptr_idxs = recv_buffer_idxs_local; 4670 for (i=0;i<n_recvs;i++) { 4671 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 4672 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 4673 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 4674 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4675 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 4676 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 4677 } else { 4678 /* TODO */ 4679 } 4680 ptr_idxs += olengths_idxs[i]; 4681 ptr_vals += olengths_vals[i]; 4682 } 4683 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4684 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4685 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4686 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4687 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 4688 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 4689 4690 #if 0 4691 if (!restrict_comm) { /* check */ 4692 Vec lvec,rvec; 4693 PetscReal infty_error; 4694 4695 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 4696 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 4697 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 4698 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 4699 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 4700 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 4701 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 4702 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 4703 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 4704 } 4705 #endif 4706 4707 /* assemble new additional is (if any) */ 4708 if (nis) { 4709 PetscInt **temp_idxs,*count_is,j,psum; 4710 4711 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4712 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 4713 ptr_idxs = recv_buffer_idxs_is; 4714 psum = 0; 4715 for (i=0;i<n_recvs;i++) { 4716 for (j=0;j<nis;j++) { 4717 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4718 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 4719 psum += plen; 4720 ptr_idxs += plen+1; /* shift pointer to received data */ 4721 } 4722 } 4723 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 4724 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 4725 for (i=1;i<nis;i++) { 4726 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 4727 } 4728 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 4729 ptr_idxs = recv_buffer_idxs_is; 4730 for (i=0;i<n_recvs;i++) { 4731 for (j=0;j<nis;j++) { 4732 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 4733 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 4734 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 4735 ptr_idxs += plen+1; /* shift pointer to received data */ 4736 } 4737 } 4738 for (i=0;i<nis;i++) { 4739 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4740 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 4741 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4742 } 4743 ierr = PetscFree(count_is);CHKERRQ(ierr); 4744 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 4745 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 4746 } 4747 /* free workspace */ 4748 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 4749 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4750 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 4751 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4752 if (isdense) { 4753 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 4754 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 4755 } else { 4756 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 4757 } 4758 if (nis) { 4759 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 4760 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 4761 } 4762 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 4763 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 4764 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 4765 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 4766 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 4767 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 4768 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 4769 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 4770 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 4771 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 4772 ierr = PetscFree(onodes);CHKERRQ(ierr); 4773 if (nis) { 4774 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 4775 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 4776 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 4777 } 4778 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 4779 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 4780 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 4781 for (i=0;i<nis;i++) { 4782 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 4783 } 4784 *mat_n = NULL; 4785 } 4786 PetscFunctionReturn(0); 4787 } 4788 4789 /* temporary hack into ksp private data structure */ 4790 #include <petsc/private/kspimpl.h> 4791 4792 #undef __FUNCT__ 4793 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 4794 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 4795 { 4796 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4797 PC_IS *pcis = (PC_IS*)pc->data; 4798 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 4799 MatNullSpace CoarseNullSpace=NULL; 4800 ISLocalToGlobalMapping coarse_islg; 4801 IS coarse_is,*isarray; 4802 PetscInt i,im_active=-1,active_procs=-1; 4803 PetscInt nis,nisdofs,nisneu,nisvert; 4804 PC pc_temp; 4805 PCType coarse_pc_type; 4806 KSPType coarse_ksp_type; 4807 PetscBool multilevel_requested,multilevel_allowed; 4808 PetscBool isredundant,isbddc,isnn,coarse_reuse; 4809 Mat t_coarse_mat_is; 4810 PetscInt void_procs,ncoarse_ml,ncoarse_ds,ncoarse; 4811 PetscMPIInt all_procs; 4812 PetscBool csin_ml,csin_ds,csin,csin_type_simple,redist; 4813 PetscBool compute_vecs = PETSC_FALSE; 4814 PetscScalar *array; 4815 PetscErrorCode ierr; 4816 4817 PetscFunctionBegin; 4818 /* Assign global numbering to coarse dofs */ 4819 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 4820 PetscInt ocoarse_size; 4821 compute_vecs = PETSC_TRUE; 4822 ocoarse_size = pcbddc->coarse_size; 4823 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 4824 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 4825 /* see if we can avoid some work */ 4826 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 4827 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 4828 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 4829 PC pc; 4830 PetscBool isbddc; 4831 4832 /* temporary workaround since PCBDDC does not have a reset method so far */ 4833 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr); 4834 ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr); 4835 if (isbddc) { 4836 ierr = PCDestroy(&pc);CHKERRQ(ierr); 4837 } 4838 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 4839 coarse_reuse = PETSC_FALSE; 4840 } else { /* we can safely reuse already computed coarse matrix */ 4841 coarse_reuse = PETSC_TRUE; 4842 } 4843 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 4844 coarse_reuse = PETSC_FALSE; 4845 } 4846 /* reset any subassembling information */ 4847 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 4848 ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 4849 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 4850 coarse_reuse = PETSC_TRUE; 4851 } 4852 4853 /* count "active" (i.e. with positive local size) and "void" processes */ 4854 im_active = !!(pcis->n); 4855 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4856 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr); 4857 void_procs = all_procs-active_procs; 4858 csin_type_simple = PETSC_TRUE; 4859 redist = PETSC_FALSE; 4860 if (pcbddc->current_level && void_procs) { 4861 csin_ml = PETSC_TRUE; 4862 ncoarse_ml = void_procs; 4863 /* it has no sense to redistribute on a set of processors larger than the number of active processes */ 4864 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) { 4865 csin_ds = PETSC_TRUE; 4866 ncoarse_ds = pcbddc->redistribute_coarse; 4867 redist = PETSC_TRUE; 4868 } else { 4869 csin_ds = PETSC_TRUE; 4870 ncoarse_ds = active_procs; 4871 redist = PETSC_TRUE; 4872 } 4873 } else { 4874 csin_ml = PETSC_FALSE; 4875 ncoarse_ml = all_procs; 4876 if (void_procs) { 4877 csin_ds = PETSC_TRUE; 4878 ncoarse_ds = void_procs; 4879 csin_type_simple = PETSC_FALSE; 4880 } else { 4881 if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) { 4882 csin_ds = PETSC_TRUE; 4883 ncoarse_ds = pcbddc->redistribute_coarse; 4884 redist = PETSC_TRUE; 4885 } else { 4886 csin_ds = PETSC_FALSE; 4887 ncoarse_ds = all_procs; 4888 } 4889 } 4890 } 4891 4892 /* 4893 test if we can go multilevel: three conditions must be satisfied: 4894 - we have not exceeded the number of levels requested 4895 - we can actually subassemble the active processes 4896 - we can find a suitable number of MPI processes where we can place the subassembled problem 4897 */ 4898 multilevel_allowed = PETSC_FALSE; 4899 multilevel_requested = PETSC_FALSE; 4900 if (pcbddc->current_level < pcbddc->max_levels) { 4901 multilevel_requested = PETSC_TRUE; 4902 if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) { 4903 multilevel_allowed = PETSC_FALSE; 4904 } else { 4905 multilevel_allowed = PETSC_TRUE; 4906 } 4907 } 4908 /* determine number of process partecipating to coarse solver */ 4909 if (multilevel_allowed) { 4910 ncoarse = ncoarse_ml; 4911 csin = csin_ml; 4912 redist = PETSC_FALSE; 4913 } else { 4914 ncoarse = ncoarse_ds; 4915 csin = csin_ds; 4916 } 4917 4918 /* creates temporary l2gmap and IS for coarse indexes */ 4919 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 4920 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 4921 4922 /* creates temporary MATIS object for coarse matrix */ 4923 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 4924 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4925 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 4926 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 4927 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 4928 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 4929 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4930 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4931 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 4932 4933 /* compute dofs splitting and neumann boundaries for coarse dofs */ 4934 if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */ 4935 PetscInt *tidxs,*tidxs2,nout,tsize,i; 4936 const PetscInt *idxs; 4937 ISLocalToGlobalMapping tmap; 4938 4939 /* create map between primal indices (in local representative ordering) and local primal numbering */ 4940 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 4941 /* allocate space for temporary storage */ 4942 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 4943 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 4944 /* allocate for IS array */ 4945 nisdofs = pcbddc->n_ISForDofsLocal; 4946 nisneu = !!pcbddc->NeumannBoundariesLocal; 4947 nisvert = 0; 4948 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4949 nisvert = 1; 4950 } 4951 nis = nisdofs + nisneu + nisvert; 4952 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 4953 /* dofs splitting */ 4954 for (i=0;i<nisdofs;i++) { 4955 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 4956 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 4957 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4958 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4959 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 4960 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4961 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 4962 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 4963 } 4964 /* neumann boundaries */ 4965 if (pcbddc->NeumannBoundariesLocal) { 4966 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 4967 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 4968 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4969 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4970 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 4971 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4972 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 4973 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 4974 } 4975 /* primal vertices (benign) */ 4976 if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) { 4977 ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr); 4978 ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4979 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 4980 ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr); 4981 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 4982 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr); 4983 } 4984 /* free memory */ 4985 ierr = PetscFree(tidxs);CHKERRQ(ierr); 4986 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 4987 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 4988 } else { 4989 nis = 0; 4990 nisdofs = 0; 4991 nisneu = 0; 4992 nisvert = 0; 4993 isarray = NULL; 4994 } 4995 /* destroy no longer needed map */ 4996 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 4997 4998 /* restrict on coarse candidates (if needed) */ 4999 coarse_mat_is = NULL; 5000 if (csin) { 5001 if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */ 5002 if (redist) { 5003 PetscMPIInt rank; 5004 PetscInt spc,n_spc_p1,dest[1],destsize; 5005 5006 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5007 spc = active_procs/ncoarse; 5008 n_spc_p1 = active_procs%ncoarse; 5009 if (im_active) { 5010 destsize = 1; 5011 if (rank > n_spc_p1*(spc+1)-1) { 5012 dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc; 5013 } else { 5014 dest[0] = rank/(spc+1); 5015 } 5016 } else { 5017 destsize = 0; 5018 } 5019 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5020 } else if (csin_type_simple) { 5021 PetscMPIInt rank; 5022 PetscInt issize,isidx; 5023 5024 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 5025 if (im_active) { 5026 issize = 1; 5027 isidx = (PetscInt)rank; 5028 } else { 5029 issize = 0; 5030 isidx = -1; 5031 } 5032 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5033 } else { /* get a suitable subassembling pattern from MATIS code */ 5034 ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr); 5035 } 5036 5037 /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */ 5038 if (!redist || ncoarse <= void_procs) { 5039 PetscInt ncoarse_cand,tissize,*nisindices; 5040 PetscInt *coarse_candidates; 5041 const PetscInt* tisindices; 5042 5043 /* get coarse candidates' ranks in pc communicator */ 5044 ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr); 5045 ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5046 for (i=0,ncoarse_cand=0;i<all_procs;i++) { 5047 if (!coarse_candidates[i]) { 5048 coarse_candidates[ncoarse_cand++]=i; 5049 } 5050 } 5051 if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse); 5052 5053 5054 if (pcbddc->dbg_flag) { 5055 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5056 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr); 5057 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5058 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr); 5059 for (i=0;i<ncoarse_cand;i++) { 5060 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr); 5061 } 5062 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr); 5063 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5064 } 5065 /* shift the pattern on coarse candidates */ 5066 ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr); 5067 ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5068 ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr); 5069 for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]]; 5070 ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr); 5071 ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr); 5072 ierr = PetscFree(coarse_candidates);CHKERRQ(ierr); 5073 } 5074 if (pcbddc->dbg_flag) { 5075 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5076 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr); 5077 ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr); 5078 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5079 } 5080 } 5081 /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */ 5082 if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */ 5083 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5084 } else { /* this is the last level, so use just receiving processes in subcomm */ 5085 ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr); 5086 } 5087 } else { 5088 if (pcbddc->dbg_flag) { 5089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5090 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr); 5091 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5092 } 5093 ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr); 5094 coarse_mat_is = t_coarse_mat_is; 5095 } 5096 5097 /* create local to global scatters for coarse problem */ 5098 if (compute_vecs) { 5099 PetscInt lrows; 5100 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 5101 if (coarse_mat_is) { 5102 ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr); 5103 } else { 5104 lrows = 0; 5105 } 5106 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 5107 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 5108 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 5109 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5110 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 5111 } 5112 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 5113 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 5114 5115 /* set defaults for coarse KSP and PC */ 5116 if (multilevel_allowed) { 5117 coarse_ksp_type = KSPRICHARDSON; 5118 coarse_pc_type = PCBDDC; 5119 } else { 5120 coarse_ksp_type = KSPPREONLY; 5121 coarse_pc_type = PCREDUNDANT; 5122 } 5123 5124 /* print some info if requested */ 5125 if (pcbddc->dbg_flag) { 5126 if (!multilevel_allowed) { 5127 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5128 if (multilevel_requested) { 5129 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5130 } else if (pcbddc->max_levels) { 5131 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 5132 } 5133 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5134 } 5135 } 5136 5137 /* create the coarse KSP object only once with defaults */ 5138 if (coarse_mat_is) { 5139 MatReuse coarse_mat_reuse; 5140 PetscViewer dbg_viewer = NULL; 5141 if (pcbddc->dbg_flag) { 5142 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is)); 5143 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5144 } 5145 if (!pcbddc->coarse_ksp) { 5146 char prefix[256],str_level[16]; 5147 size_t len; 5148 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr); 5149 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5150 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 5151 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 5152 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr); 5153 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 5154 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 5155 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5156 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5157 /* prefix */ 5158 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 5159 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 5160 if (!pcbddc->current_level) { 5161 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5162 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 5163 } else { 5164 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5165 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5166 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5167 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5168 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 5169 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 5170 } 5171 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 5172 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5173 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 5174 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 5175 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 5176 /* allow user customization */ 5177 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 5178 } 5179 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 5180 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 5181 if (nisdofs) { 5182 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 5183 for (i=0;i<nisdofs;i++) { 5184 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 5185 } 5186 } 5187 if (nisneu) { 5188 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 5189 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 5190 } 5191 if (nisvert) { 5192 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 5193 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 5194 } 5195 5196 /* get some info after set from options */ 5197 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 5198 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 5199 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 5200 if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */ 5201 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 5202 isbddc = PETSC_FALSE; 5203 } 5204 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5205 if (isredundant) { 5206 KSP inner_ksp; 5207 PC inner_pc; 5208 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 5209 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 5210 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 5211 } 5212 5213 /* assemble coarse matrix */ 5214 if (coarse_reuse) { 5215 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5216 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 5217 coarse_mat_reuse = MAT_REUSE_MATRIX; 5218 } else { 5219 coarse_mat_reuse = MAT_INITIAL_MATRIX; 5220 } 5221 if (isbddc || isnn) { 5222 if (pcbddc->coarsening_ratio > 1) { 5223 if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */ 5224 ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 5225 if (pcbddc->dbg_flag) { 5226 ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5227 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr); 5228 ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr); 5229 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5230 } 5231 } 5232 ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr); 5233 } else { 5234 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 5235 coarse_mat = coarse_mat_is; 5236 } 5237 } else { 5238 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 5239 } 5240 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 5241 5242 /* propagate symmetry info of coarse matrix */ 5243 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 5244 if (pc->pmat->symmetric_set) { 5245 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 5246 } 5247 if (pc->pmat->hermitian_set) { 5248 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 5249 } 5250 if (pc->pmat->spd_set) { 5251 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 5252 } 5253 /* set operators */ 5254 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5255 if (pcbddc->dbg_flag) { 5256 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 5257 } 5258 } else { /* processes non partecipating to coarse solver (if any) */ 5259 coarse_mat = 0; 5260 } 5261 ierr = PetscFree(isarray);CHKERRQ(ierr); 5262 #if 0 5263 { 5264 PetscViewer viewer; 5265 char filename[256]; 5266 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 5267 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 5268 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5269 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 5270 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 5271 } 5272 #endif 5273 5274 /* Compute coarse null space (special handling by BDDC only) */ 5275 #if 0 5276 if (pcbddc->NullSpace) { 5277 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 5278 } 5279 #endif 5280 /* hack */ 5281 if (pcbddc->coarse_ksp) { 5282 Vec crhs,csol; 5283 5284 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 5285 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 5286 if (!csol) { 5287 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 5288 } 5289 if (!crhs) { 5290 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 5291 } 5292 } 5293 5294 /* compute null space for coarse solver if the benign trick has been requested */ 5295 if (pcbddc->benign_null) { 5296 5297 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 5298 for (i=0;i<pcbddc->benign_n;i++) { 5299 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5300 } 5301 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 5302 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 5303 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5304 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5305 if (coarse_mat) { 5306 Vec nullv; 5307 PetscScalar *array,*array2; 5308 PetscInt nl; 5309 5310 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 5311 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 5312 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5313 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 5314 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 5315 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 5316 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 5317 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 5318 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 5319 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 5320 } 5321 } 5322 5323 if (pcbddc->coarse_ksp) { 5324 PetscBool ispreonly; 5325 5326 if (CoarseNullSpace) { 5327 PetscBool isnull; 5328 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 5329 if (isnull) { 5330 if (isbddc && !pcbddc->benign_saddle_point) { 5331 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 5332 } else { 5333 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 5334 } 5335 } else { 5336 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5337 } 5338 } 5339 /* setup coarse ksp */ 5340 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 5341 /* Check coarse problem if in debug mode or if solving with an iterative method */ 5342 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 5343 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 5344 KSP check_ksp; 5345 KSPType check_ksp_type; 5346 PC check_pc; 5347 Vec check_vec,coarse_vec; 5348 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 5349 PetscInt its; 5350 PetscBool compute_eigs; 5351 PetscReal *eigs_r,*eigs_c; 5352 PetscInt neigs; 5353 const char *prefix; 5354 5355 /* Create ksp object suitable for estimation of extreme eigenvalues */ 5356 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 5357 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 5358 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 5359 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 5360 if (ispreonly) { 5361 check_ksp_type = KSPPREONLY; 5362 compute_eigs = PETSC_FALSE; 5363 } else { 5364 check_ksp_type = KSPGMRES; 5365 compute_eigs = PETSC_TRUE; 5366 } 5367 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 5368 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 5369 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 5370 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 5371 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 5372 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 5373 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 5374 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 5375 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 5376 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 5377 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 5378 /* create random vec */ 5379 ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr); 5380 ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr); 5381 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 5382 if (CoarseNullSpace) { 5383 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 5384 } 5385 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5386 /* solve coarse problem */ 5387 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 5388 if (CoarseNullSpace) { 5389 ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr); 5390 } 5391 /* set eigenvalue estimation if preonly has not been requested */ 5392 if (compute_eigs) { 5393 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 5394 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 5395 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 5396 lambda_max = eigs_r[neigs-1]; 5397 lambda_min = eigs_r[0]; 5398 if (pcbddc->use_coarse_estimates) { 5399 if (lambda_max>lambda_min) { 5400 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr); 5401 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 5402 } 5403 } 5404 } 5405 5406 /* check coarse problem residual error */ 5407 if (pcbddc->dbg_flag) { 5408 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 5409 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5410 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 5411 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 5412 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 5413 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 5414 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 5415 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 5416 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 5417 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 5418 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 5419 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 5420 if (CoarseNullSpace) { 5421 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 5422 } 5423 if (compute_eigs) { 5424 PetscReal lambda_max_s,lambda_min_s; 5425 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 5426 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 5427 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 5428 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 5429 for (i=0;i<neigs;i++) { 5430 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 5431 } 5432 } 5433 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 5434 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 5435 } 5436 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 5437 if (compute_eigs) { 5438 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 5439 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 5440 } 5441 } 5442 } 5443 /* print additional info */ 5444 if (pcbddc->dbg_flag) { 5445 /* waits until all processes reaches this point */ 5446 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 5447 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 5448 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5449 } 5450 5451 /* free memory */ 5452 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 5453 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 5454 PetscFunctionReturn(0); 5455 } 5456 5457 #undef __FUNCT__ 5458 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 5459 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 5460 { 5461 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5462 PC_IS* pcis = (PC_IS*)pc->data; 5463 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5464 IS subset,subset_mult,subset_n; 5465 PetscInt local_size,coarse_size=0; 5466 PetscInt *local_primal_indices=NULL; 5467 const PetscInt *t_local_primal_indices; 5468 PetscErrorCode ierr; 5469 5470 PetscFunctionBegin; 5471 /* Compute global number of coarse dofs */ 5472 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) { 5473 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 5474 } 5475 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 5476 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 5477 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5478 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 5479 ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 5480 ierr = ISDestroy(&subset);CHKERRQ(ierr); 5481 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 5482 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 5483 if (local_size != pcbddc->local_primal_size) { 5484 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size); 5485 } 5486 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 5487 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5488 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 5489 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 5490 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 5491 5492 /* check numbering */ 5493 if (pcbddc->dbg_flag) { 5494 PetscScalar coarsesum,*array,*array2; 5495 PetscInt i; 5496 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 5497 5498 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5499 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5500 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 5501 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5502 /* counter */ 5503 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5504 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 5505 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5506 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5507 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5508 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5509 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 5510 for (i=0;i<pcbddc->local_primal_size;i++) { 5511 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5512 } 5513 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5514 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5515 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5516 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5517 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5518 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5519 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5520 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5521 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5522 for (i=0;i<pcis->n;i++) { 5523 if (array[i] != 0.0 && array[i] != array2[i]) { 5524 PetscInt owned = (PetscInt)PetscRealPart(array[i]); 5525 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 5526 set_error = PETSC_TRUE; 5527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr); 5528 } 5529 } 5530 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 5531 ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 5532 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5533 for (i=0;i<pcis->n;i++) { 5534 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 5535 } 5536 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5537 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 5538 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5539 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5540 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 5541 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 5542 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 5543 PetscInt *gidxs; 5544 5545 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 5546 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 5547 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 5548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5549 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 5550 for (i=0;i<pcbddc->local_primal_size;i++) { 5551 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 5552 } 5553 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5554 ierr = PetscFree(gidxs);CHKERRQ(ierr); 5555 } 5556 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5557 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5558 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 5559 } 5560 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 5561 /* get back data */ 5562 *coarse_size_n = coarse_size; 5563 *local_primal_indices_n = local_primal_indices; 5564 PetscFunctionReturn(0); 5565 } 5566 5567 #undef __FUNCT__ 5568 #define __FUNCT__ "PCBDDCGlobalToLocal" 5569 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 5570 { 5571 IS localis_t; 5572 PetscInt i,lsize,*idxs,n; 5573 PetscScalar *vals; 5574 PetscErrorCode ierr; 5575 5576 PetscFunctionBegin; 5577 /* get indices in local ordering exploiting local to global map */ 5578 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 5579 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 5580 for (i=0;i<lsize;i++) vals[i] = 1.0; 5581 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5582 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 5583 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 5584 if (idxs) { /* multilevel guard */ 5585 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 5586 } 5587 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 5588 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 5589 ierr = PetscFree(vals);CHKERRQ(ierr); 5590 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 5591 /* now compute set in local ordering */ 5592 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5593 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5594 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5595 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 5596 for (i=0,lsize=0;i<n;i++) { 5597 if (PetscRealPart(vals[i]) > 0.5) { 5598 lsize++; 5599 } 5600 } 5601 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 5602 for (i=0,lsize=0;i<n;i++) { 5603 if (PetscRealPart(vals[i]) > 0.5) { 5604 idxs[lsize++] = i; 5605 } 5606 } 5607 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 5608 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 5609 *localis = localis_t; 5610 PetscFunctionReturn(0); 5611 } 5612 5613 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */ 5614 #undef __FUNCT__ 5615 #define __FUNCT__ "PCBDDCMatMult_Private" 5616 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y) 5617 { 5618 PCBDDCChange_ctx change_ctx; 5619 PetscErrorCode ierr; 5620 5621 PetscFunctionBegin; 5622 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5623 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5624 ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5625 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5626 PetscFunctionReturn(0); 5627 } 5628 5629 #undef __FUNCT__ 5630 #define __FUNCT__ "PCBDDCMatMultTranspose_Private" 5631 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y) 5632 { 5633 PCBDDCChange_ctx change_ctx; 5634 PetscErrorCode ierr; 5635 5636 PetscFunctionBegin; 5637 ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr); 5638 ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr); 5639 ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr); 5640 ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr); 5641 PetscFunctionReturn(0); 5642 } 5643 5644 #undef __FUNCT__ 5645 #define __FUNCT__ "PCBDDCSetUpSubSchurs" 5646 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 5647 { 5648 PC_IS *pcis=(PC_IS*)pc->data; 5649 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5650 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5651 Mat S_j; 5652 PetscInt *used_xadj,*used_adjncy; 5653 PetscBool free_used_adj; 5654 PetscErrorCode ierr; 5655 5656 PetscFunctionBegin; 5657 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 5658 free_used_adj = PETSC_FALSE; 5659 if (pcbddc->sub_schurs_layers == -1) { 5660 used_xadj = NULL; 5661 used_adjncy = NULL; 5662 } else { 5663 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 5664 used_xadj = pcbddc->mat_graph->xadj; 5665 used_adjncy = pcbddc->mat_graph->adjncy; 5666 } else if (pcbddc->computed_rowadj) { 5667 used_xadj = pcbddc->mat_graph->xadj; 5668 used_adjncy = pcbddc->mat_graph->adjncy; 5669 } else { 5670 PetscBool flg_row=PETSC_FALSE; 5671 const PetscInt *xadj,*adjncy; 5672 PetscInt nvtxs; 5673 5674 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5675 if (flg_row) { 5676 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 5677 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 5678 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 5679 free_used_adj = PETSC_TRUE; 5680 } else { 5681 pcbddc->sub_schurs_layers = -1; 5682 used_xadj = NULL; 5683 used_adjncy = NULL; 5684 } 5685 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 5686 } 5687 } 5688 5689 /* setup sub_schurs data */ 5690 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5691 if (!sub_schurs->use_mumps) { 5692 /* pcbddc->ksp_D up to date only if not using MUMPS */ 5693 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5694 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr); 5695 } else { 5696 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 5697 PetscBool isseqaij; 5698 PetscInt i; 5699 if (!pcbddc->use_vertices && reuse_solvers) { 5700 PetscInt n_vertices; 5701 5702 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5703 reuse_solvers = (PetscBool)!n_vertices; 5704 } 5705 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5706 if (!isseqaij) { 5707 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5708 if (matis->A == pcbddc->local_mat) { 5709 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5710 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5711 } else { 5712 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5713 } 5714 } 5715 if (!pcbddc->benign_n) { 5716 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,0,NULL,NULL);CHKERRQ(ierr); 5717 } else { 5718 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5719 ierr = PCBDDCSubSchursSetUp(sub_schurs,matis->A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 5720 } 5721 /* we no longer need these index sets */ 5722 for (i=0;i<pcbddc->benign_n;i++) { 5723 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 5724 } 5725 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 5726 } 5727 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5728 5729 /* free adjacency */ 5730 if (free_used_adj) { 5731 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 5732 } 5733 PetscFunctionReturn(0); 5734 } 5735 5736 #undef __FUNCT__ 5737 #define __FUNCT__ "PCBDDCInitSubSchurs" 5738 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 5739 { 5740 PC_IS *pcis=(PC_IS*)pc->data; 5741 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5742 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 5743 PCBDDCGraph graph; 5744 PetscErrorCode ierr; 5745 5746 PetscFunctionBegin; 5747 /* attach interface graph for determining subsets */ 5748 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 5749 IS verticesIS,verticescomm; 5750 PetscInt vsize,*idxs; 5751 5752 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 5753 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 5754 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5755 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 5756 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 5757 ierr = ISDestroy(&verticesIS);CHKERRQ(ierr); 5758 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 5759 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr); 5760 ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 5761 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 5762 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 5763 /* 5764 if (pcbddc->dbg_flag) { 5765 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5766 } 5767 */ 5768 } else { 5769 graph = pcbddc->mat_graph; 5770 } 5771 5772 /* sub_schurs init */ 5773 ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr); 5774 5775 /* free graph struct */ 5776 if (pcbddc->sub_schurs_rebuild) { 5777 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 5778 } 5779 PetscFunctionReturn(0); 5780 } 5781 5782 #undef __FUNCT__ 5783 #define __FUNCT__ "PCBDDCCheckOperator" 5784 PetscErrorCode PCBDDCCheckOperator(PC pc) 5785 { 5786 PC_IS *pcis=(PC_IS*)pc->data; 5787 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 5788 PetscErrorCode ierr; 5789 5790 PetscFunctionBegin; 5791 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 5792 IS zerodiag = NULL; 5793 Mat S_j,B0_B=NULL; 5794 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 5795 PetscScalar *p0_check,*array,*array2; 5796 PetscReal norm; 5797 PetscInt i; 5798 5799 /* B0 and B0_B */ 5800 if (zerodiag) { 5801 IS dummy; 5802 5803 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 5804 ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 5805 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 5806 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 5807 } 5808 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 5809 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 5810 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 5811 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5812 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5813 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5814 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5815 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 5816 /* S_j */ 5817 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 5818 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 5819 5820 /* mimic vector in \widetilde{W}_\Gamma */ 5821 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 5822 /* continuous in primal space */ 5823 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 5824 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5825 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5826 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5827 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 5828 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 5829 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5830 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5831 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5832 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5833 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5834 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5835 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 5836 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 5837 5838 /* assemble rhs for coarse problem */ 5839 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 5840 /* local with Schur */ 5841 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 5842 if (zerodiag) { 5843 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5844 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 5845 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5846 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5847 } 5848 /* sum on primal nodes the local contributions */ 5849 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5850 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5851 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5852 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5853 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 5854 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 5855 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 5856 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 5857 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5858 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5859 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5860 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5861 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5862 /* scale primal nodes (BDDC sums contibutions) */ 5863 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 5864 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 5865 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5866 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 5867 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 5868 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5869 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5870 /* global: \widetilde{B0}_B w_\Gamma */ 5871 if (zerodiag) { 5872 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 5873 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 5874 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 5875 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 5876 } 5877 /* BDDC */ 5878 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 5879 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 5880 5881 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 5882 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 5883 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 5884 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 5885 for (i=0;i<pcbddc->benign_n;i++) { 5886 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 5887 } 5888 ierr = PetscFree(p0_check);CHKERRQ(ierr); 5889 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 5890 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 5891 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 5892 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 5893 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 5894 } 5895 PetscFunctionReturn(0); 5896 } 5897