1 2 /*************************************xxt.c************************************ 3 Module Name: xxt 4 Module Info: 5 6 author: Henry M. Tufo III 7 e-mail: hmt@asci.uchicago.edu 8 contact: 9 +--------------------------------+--------------------------------+ 10 |MCS Division - Building 221 |Department of Computer Science | 11 |Argonne National Laboratory |Ryerson 152 | 12 |9700 S. Cass Avenue |The University of Chicago | 13 |Argonne, IL 60439 |Chicago, IL 60637 | 14 |(630) 252-5354/5986 ph/fx |(773) 702-6019/8487 ph/fx | 15 +--------------------------------+--------------------------------+ 16 17 Last Modification: 3.20.01 18 **************************************xxt.c***********************************/ 19 20 21 /*************************************xxt.c************************************ 22 NOTES ON USAGE: 23 24 **************************************xxt.c***********************************/ 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <limits.h> 28 #include <float.h> 29 #include <math.h> 30 31 #include "petsc.h" 32 33 #include "const.h" 34 #include "types.h" 35 #include "comm.h" 36 #include "error.h" 37 #include "ivec.h" 38 #include "queue.h" 39 #include "gs.h" 40 #include "xxt.h" 41 42 #define LEFT -1 43 #define RIGHT 1 44 #define BOTH 0 45 #define MAX_FORTRAN_HANDLES 10 46 47 typedef struct xxt_solver_info { 48 int n, m, n_global, m_global; 49 int nnz, max_nnz, msg_buf_sz; 50 int *nsep, *lnsep, *fo, nfo, *stages; 51 int *col_sz, *col_indices; 52 PetscScalar **col_vals, *x, *solve_uu, *solve_w; 53 int nsolves; 54 PetscScalar tot_solve_time; 55 } xxt_info; 56 57 typedef struct matvec_info { 58 int n, m, n_global, m_global; 59 int *local2global; 60 gs_ADT gs_handle; 61 PetscErrorCode (*matvec)(struct matvec_info*,PetscScalar*,PetscScalar*); 62 void *grid_data; 63 } mv_info; 64 65 struct xxt_CDT{ 66 int id; 67 int ns; 68 int level; 69 xxt_info *info; 70 mv_info *mvi; 71 }; 72 73 static int n_xxt=0; 74 static int n_xxt_handles=0; 75 76 /* prototypes */ 77 static void do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *rhs); 78 static void check_init(void); 79 static void check_handle(xxt_ADT xxt_handle); 80 static void det_separators(xxt_ADT xxt_handle); 81 static void do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u); 82 static int xxt_generate(xxt_ADT xxt_handle); 83 static int do_xxt_factor(xxt_ADT xxt_handle); 84 static mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data); 85 86 87 88 /*************************************xxt.c************************************ 89 Function: XXT_new() 90 91 Input : 92 Output: 93 Return: 94 Description: 95 **************************************xxt.c***********************************/ 96 xxt_ADT 97 XXT_new(void) 98 { 99 xxt_ADT xxt_handle; 100 101 102 103 /* rolling count on n_xxt ... pot. problem here */ 104 n_xxt_handles++; 105 xxt_handle = (xxt_ADT)malloc(sizeof(struct xxt_CDT)); 106 xxt_handle->id = ++n_xxt; 107 xxt_handle->info = NULL; xxt_handle->mvi = NULL; 108 109 return(xxt_handle); 110 } 111 112 113 /*************************************xxt.c************************************ 114 Function: XXT_factor() 115 116 Input : 117 Output: 118 Return: 119 Description: 120 **************************************xxt.c***********************************/ 121 int 122 XXT_factor(xxt_ADT xxt_handle, /* prev. allocated xxt handle */ 123 int *local2global, /* global column mapping */ 124 int n, /* local num rows */ 125 int m, /* local num cols */ 126 void *matvec, /* b_loc=A_local.x_loc */ 127 void *grid_data /* grid data for matvec */ 128 ) 129 { 130 check_init(); 131 check_handle(xxt_handle); 132 133 /* only 2^k for now and all nodes participating */ 134 if ((1<<(xxt_handle->level=i_log2_num_nodes))!=num_nodes) 135 {error_msg_fatal("only 2^k for now and MPI_COMM_WORLD!!! %d != %d\n",1<<i_log2_num_nodes,num_nodes);} 136 137 /* space for X info */ 138 xxt_handle->info = (xxt_info*)malloc(sizeof(xxt_info)); 139 140 /* set up matvec handles */ 141 xxt_handle->mvi = set_mvi(local2global, n, m, matvec, grid_data); 142 143 /* matrix is assumed to be of full rank */ 144 /* LATER we can reset to indicate rank def. */ 145 xxt_handle->ns=0; 146 147 /* determine separators and generate firing order - NB xxt info set here */ 148 det_separators(xxt_handle); 149 150 return(do_xxt_factor(xxt_handle)); 151 } 152 153 154 /*************************************xxt.c************************************ 155 Function: XXT_solve 156 157 Input : 158 Output: 159 Return: 160 Description: 161 **************************************xxt.c***********************************/ 162 int 163 XXT_solve(xxt_ADT xxt_handle, double *x, double *b) 164 { 165 166 check_init(); 167 check_handle(xxt_handle); 168 169 /* need to copy b into x? */ 170 if (b) 171 {rvec_copy(x,b,xxt_handle->mvi->n);} 172 do_xxt_solve(xxt_handle,x); 173 174 return(0); 175 } 176 177 178 /*************************************xxt.c************************************ 179 Function: XXT_free() 180 181 Input : 182 Output: 183 Return: 184 Description: 185 **************************************xxt.c***********************************/ 186 int 187 XXT_free(xxt_ADT xxt_handle) 188 { 189 190 check_init(); 191 check_handle(xxt_handle); 192 n_xxt_handles--; 193 194 free(xxt_handle->info->nsep); 195 free(xxt_handle->info->lnsep); 196 free(xxt_handle->info->fo); 197 free(xxt_handle->info->stages); 198 free(xxt_handle->info->solve_uu); 199 free(xxt_handle->info->solve_w); 200 free(xxt_handle->info->x); 201 free(xxt_handle->info->col_vals); 202 free(xxt_handle->info->col_sz); 203 free(xxt_handle->info->col_indices); 204 free(xxt_handle->info); 205 free(xxt_handle->mvi->local2global); 206 gs_free(xxt_handle->mvi->gs_handle); 207 free(xxt_handle->mvi); 208 free(xxt_handle); 209 210 211 212 /* if the check fails we nuke */ 213 /* if NULL pointer passed to free we nuke */ 214 /* if the calls to free fail that's not my problem */ 215 return(0); 216 } 217 218 219 220 /*************************************xxt.c************************************ 221 Function: 222 223 Input : 224 Output: 225 Return: 226 Description: 227 **************************************xxt.c***********************************/ 228 int 229 XXT_stats(xxt_ADT xxt_handle) 230 { 231 int op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD}; 232 int fop[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD}; 233 int vals[9], work[9]; 234 PetscScalar fvals[3], fwork[3]; 235 236 237 238 check_init(); 239 check_handle(xxt_handle); 240 241 /* if factorization not done there are no stats */ 242 if (!xxt_handle->info||!xxt_handle->mvi) 243 { 244 if (!my_id) 245 {printf("XXT_stats() :: no stats available!\n");} 246 return 1; 247 } 248 249 vals[0]=vals[1]=vals[2]=xxt_handle->info->nnz; 250 vals[3]=vals[4]=vals[5]=xxt_handle->mvi->n; 251 vals[6]=vals[7]=vals[8]=xxt_handle->info->msg_buf_sz; 252 giop(vals,work,sizeof(op)/sizeof(op[0])-1,op); 253 254 fvals[0]=fvals[1]=fvals[2] 255 =xxt_handle->info->tot_solve_time/xxt_handle->info->nsolves++; 256 grop(fvals,fwork,sizeof(fop)/sizeof(fop[0])-1,fop); 257 258 if (!my_id) 259 { 260 printf("%d :: min xxt_nnz=%d\n",my_id,vals[0]); 261 printf("%d :: max xxt_nnz=%d\n",my_id,vals[1]); 262 printf("%d :: avg xxt_nnz=%g\n",my_id,1.0*vals[2]/num_nodes); 263 printf("%d :: tot xxt_nnz=%d\n",my_id,vals[2]); 264 printf("%d :: xxt C(2d) =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.5))); 265 printf("%d :: xxt C(3d) =%g\n",my_id,vals[2]/(pow(1.0*vals[5],1.6667))); 266 printf("%d :: min xxt_n =%d\n",my_id,vals[3]); 267 printf("%d :: max xxt_n =%d\n",my_id,vals[4]); 268 printf("%d :: avg xxt_n =%g\n",my_id,1.0*vals[5]/num_nodes); 269 printf("%d :: tot xxt_n =%d\n",my_id,vals[5]); 270 printf("%d :: min xxt_buf=%d\n",my_id,vals[6]); 271 printf("%d :: max xxt_buf=%d\n",my_id,vals[7]); 272 printf("%d :: avg xxt_buf=%g\n",my_id,1.0*vals[8]/num_nodes); 273 printf("%d :: min xxt_slv=%g\n",my_id,fvals[0]); 274 printf("%d :: max xxt_slv=%g\n",my_id,fvals[1]); 275 printf("%d :: avg xxt_slv=%g\n",my_id,fvals[2]/num_nodes); 276 } 277 278 return(0); 279 } 280 281 282 /*************************************xxt.c************************************ 283 Function: do_xxt_factor 284 285 Input : 286 Output: 287 Return: 288 Description: get A_local, local portion of global coarse matrix which 289 is a row dist. nxm matrix w/ n<m. 290 o my_ml holds address of ML struct associated w/A_local and coarse grid 291 o local2global holds global number of column i (i=0,...,m-1) 292 o local2global holds global number of row i (i=0,...,n-1) 293 o mylocmatvec performs A_local . vec_local (note that gs is performed using 294 gs_init/gop). 295 296 mylocmatvec = my_ml->Amat[grid_tag].matvec->external; 297 mylocmatvec (void :: void *data, double *in, double *out) 298 **************************************xxt.c***********************************/ 299 static 300 int 301 do_xxt_factor(xxt_ADT xxt_handle) 302 { 303 int flag; 304 305 306 flag=xxt_generate(xxt_handle); 307 308 return(flag); 309 } 310 311 312 /*************************************xxt.c************************************ 313 Function: 314 315 Input : 316 Output: 317 Return: 318 Description: 319 **************************************xxt.c***********************************/ 320 static 321 int 322 xxt_generate(xxt_ADT xxt_handle) 323 { 324 int i,j,k,idex; 325 int dim, col; 326 PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w; 327 int *segs; 328 int op[] = {GL_ADD,0}; 329 int off, len; 330 PetscScalar *x_ptr; 331 int *iptr, flag; 332 int start=0, end, work; 333 int op2[] = {GL_MIN,0}; 334 gs_ADT gs_handle; 335 int *nsep, *lnsep, *fo; 336 int a_n=xxt_handle->mvi->n; 337 int a_m=xxt_handle->mvi->m; 338 int *a_local2global=xxt_handle->mvi->local2global; 339 int level; 340 int xxt_nnz=0, xxt_max_nnz=0; 341 int n, m; 342 int *col_sz, *col_indices, *stages; 343 PetscScalar **col_vals, *x; 344 int n_global; 345 int xxt_zero_nnz=0; 346 int xxt_zero_nnz_0=0; 347 PetscBLASInt i1 = 1; 348 PetscScalar dm1 = -1.0; 349 350 n=xxt_handle->mvi->n; 351 nsep=xxt_handle->info->nsep; 352 lnsep=xxt_handle->info->lnsep; 353 fo=xxt_handle->info->fo; 354 end=lnsep[0]; 355 level=xxt_handle->level; 356 gs_handle=xxt_handle->mvi->gs_handle; 357 358 /* is there a null space? */ 359 /* LATER add in ability to detect null space by checking alpha */ 360 for (i=0, j=0; i<=level; i++) 361 {j+=nsep[i];} 362 363 m = j-xxt_handle->ns; 364 if (m!=j) 365 {printf("xxt_generate() :: null space exists %d %d %d\n",m,j,xxt_handle->ns);} 366 367 /* get and initialize storage for x local */ 368 /* note that x local is nxm and stored by columns */ 369 col_sz = (int*) malloc(m*sizeof(PetscInt)); 370 col_indices = (int*) malloc((2*m+1)*sizeof(int)); 371 col_vals = (PetscScalar **) malloc(m*sizeof(PetscScalar *)); 372 for (i=j=0; i<m; i++, j+=2) 373 { 374 col_indices[j]=col_indices[j+1]=col_sz[i]=-1; 375 col_vals[i] = NULL; 376 } 377 col_indices[j]=-1; 378 379 /* size of separators for each sub-hc working from bottom of tree to top */ 380 /* this looks like nsep[]=segments */ 381 stages = (int*) malloc((level+1)*sizeof(PetscInt)); 382 segs = (int*) malloc((level+1)*sizeof(PetscInt)); 383 ivec_zero(stages,level+1); 384 ivec_copy(segs,nsep,level+1); 385 for (i=0; i<level; i++) 386 {segs[i+1] += segs[i];} 387 stages[0] = segs[0]; 388 389 /* temporary vectors */ 390 u = (PetscScalar *) malloc(n*sizeof(PetscScalar)); 391 z = (PetscScalar *) malloc(n*sizeof(PetscScalar)); 392 v = (PetscScalar *) malloc(a_m*sizeof(PetscScalar)); 393 uu = (PetscScalar *) malloc(m*sizeof(PetscScalar)); 394 w = (PetscScalar *) malloc(m*sizeof(PetscScalar)); 395 396 /* extra nnz due to replication of vertices across separators */ 397 for (i=1, j=0; i<=level; i++) 398 {j+=nsep[i];} 399 400 /* storage for sparse x values */ 401 n_global = xxt_handle->info->n_global; 402 xxt_max_nnz = (int)(2.5*pow(1.0*n_global,1.6667) + j*n/2)/num_nodes; 403 x = (PetscScalar *) malloc(xxt_max_nnz*sizeof(PetscScalar)); 404 xxt_nnz = 0; 405 406 /* LATER - can embed next sep to fire in gs */ 407 /* time to make the donuts - generate X factor */ 408 for (dim=i=j=0;i<m;i++) 409 { 410 /* time to move to the next level? */ 411 while (i==segs[dim]) 412 { 413 #ifdef SAFE 414 if (dim==level) 415 {error_msg_fatal("dim about to exceed level\n"); break;} 416 #endif 417 418 stages[dim++]=i; 419 end+=lnsep[dim]; 420 } 421 stages[dim]=i; 422 423 /* which column are we firing? */ 424 /* i.e. set v_l */ 425 /* use new seps and do global min across hc to determine which one to fire */ 426 (start<end) ? (col=fo[start]) : (col=INT_MAX); 427 giop_hc(&col,&work,1,op2,dim); 428 429 /* shouldn't need this */ 430 if (col==INT_MAX) 431 { 432 error_msg_warning("hey ... col==INT_MAX??\n"); 433 continue; 434 } 435 436 /* do I own it? I should */ 437 rvec_zero(v ,a_m); 438 if (col==fo[start]) 439 { 440 start++; 441 idex=ivec_linear_search(col, a_local2global, a_n); 442 if (idex!=-1) 443 {v[idex] = 1.0; j++;} 444 else 445 {error_msg_fatal("NOT FOUND!\n");} 446 } 447 else 448 { 449 idex=ivec_linear_search(col, a_local2global, a_m); 450 if (idex!=-1) 451 {v[idex] = 1.0;} 452 } 453 454 /* perform u = A.v_l */ 455 rvec_zero(u,n); 456 do_matvec(xxt_handle->mvi,v,u); 457 458 /* uu = X^T.u_l (local portion) */ 459 /* technically only need to zero out first i entries */ 460 /* later turn this into an XXT_solve call ? */ 461 rvec_zero(uu,m); 462 x_ptr=x; 463 iptr = col_indices; 464 for (k=0; k<i; k++) 465 { 466 off = *iptr++; 467 len = *iptr++; 468 469 uu[k] = BLdot_(&len,u+off,&i1,x_ptr,&i1); 470 x_ptr+=len; 471 } 472 473 474 /* uu = X^T.u_l (comm portion) */ 475 ssgl_radd (uu, w, dim, stages); 476 477 /* z = X.uu */ 478 rvec_zero(z,n); 479 x_ptr=x; 480 iptr = col_indices; 481 for (k=0; k<i; k++) 482 { 483 off = *iptr++; 484 len = *iptr++; 485 486 BLaxpy_(&len,&uu[k],x_ptr,&i1,z+off,&i1); 487 x_ptr+=len; 488 } 489 490 /* compute v_l = v_l - z */ 491 rvec_zero(v+a_n,a_m-a_n); 492 BLaxpy_(&n,&dm1,z,&i1,v,&i1); 493 494 /* compute u_l = A.v_l */ 495 if (a_n!=a_m) 496 {gs_gop_hc(gs_handle,v,"+\0",dim);} 497 rvec_zero(u,n); 498 do_matvec(xxt_handle->mvi,v,u); 499 500 /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */ 501 alpha = BLdot_(&n,u,&i1,v,&i1); 502 /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */ 503 grop_hc(&alpha, &alpha_w, 1, op, dim); 504 505 alpha = (PetscScalar) sqrt((double)alpha); 506 507 /* check for small alpha */ 508 /* LATER use this to detect and determine null space */ 509 if (fabs(alpha)<1.0e-14) 510 {error_msg_fatal("bad alpha! %g\n",alpha);} 511 512 /* compute v_l = v_l/sqrt(alpha) */ 513 rvec_scale(v,1.0/alpha,n); 514 515 /* add newly generated column, v_l, to X */ 516 flag = 1; 517 off=len=0; 518 for (k=0; k<n; k++) 519 { 520 if (v[k]!=0.0) 521 { 522 len=k; 523 if (flag) 524 {off=k; flag=0;} 525 } 526 } 527 528 len -= (off-1); 529 530 if (len>0) 531 { 532 if ((xxt_nnz+len)>xxt_max_nnz) 533 { 534 error_msg_warning("increasing space for X by 2x!\n"); 535 xxt_max_nnz *= 2; 536 x_ptr = (PetscScalar *) malloc(xxt_max_nnz*sizeof(PetscScalar)); 537 rvec_copy(x_ptr,x,xxt_nnz); 538 free(x); 539 x = x_ptr; 540 x_ptr+=xxt_nnz; 541 } 542 xxt_nnz += len; 543 rvec_copy(x_ptr,v+off,len); 544 545 /* keep track of number of zeros */ 546 if (dim) 547 { 548 for (k=0; k<len; k++) 549 { 550 if (x_ptr[k]==0.0) 551 {xxt_zero_nnz++;} 552 } 553 } 554 else 555 { 556 for (k=0; k<len; k++) 557 { 558 if (x_ptr[k]==0.0) 559 {xxt_zero_nnz_0++;} 560 } 561 } 562 col_indices[2*i] = off; 563 col_sz[i] = col_indices[2*i+1] = len; 564 col_vals[i] = x_ptr; 565 } 566 else 567 { 568 col_indices[2*i] = 0; 569 col_sz[i] = col_indices[2*i+1] = 0; 570 col_vals[i] = x_ptr; 571 } 572 } 573 574 /* close off stages for execution phase */ 575 while (dim!=level) 576 { 577 stages[dim++]=i; 578 error_msg_warning("disconnected!!! dim(%d)!=level(%d)\n",dim,level); 579 } 580 stages[dim]=i; 581 582 xxt_handle->info->n=xxt_handle->mvi->n; 583 xxt_handle->info->m=m; 584 xxt_handle->info->nnz=xxt_nnz; 585 xxt_handle->info->max_nnz=xxt_max_nnz; 586 xxt_handle->info->msg_buf_sz=stages[level]-stages[0]; 587 xxt_handle->info->solve_uu = (PetscScalar *) malloc(m*sizeof(PetscScalar)); 588 xxt_handle->info->solve_w = (PetscScalar *) malloc(m*sizeof(PetscScalar)); 589 xxt_handle->info->x=x; 590 xxt_handle->info->col_vals=col_vals; 591 xxt_handle->info->col_sz=col_sz; 592 xxt_handle->info->col_indices=col_indices; 593 xxt_handle->info->stages=stages; 594 xxt_handle->info->nsolves=0; 595 xxt_handle->info->tot_solve_time=0.0; 596 597 free(segs); 598 free(u); 599 free(v); 600 free(uu); 601 free(z); 602 free(w); 603 604 return(0); 605 } 606 607 608 /*************************************xxt.c************************************ 609 Function: 610 611 Input : 612 Output: 613 Return: 614 Description: 615 **************************************xxt.c***********************************/ 616 static 617 void 618 do_xxt_solve(xxt_ADT xxt_handle, PetscScalar *uc) 619 { 620 int off, len, *iptr; 621 int level =xxt_handle->level; 622 int n =xxt_handle->info->n; 623 int m =xxt_handle->info->m; 624 int *stages =xxt_handle->info->stages; 625 int *col_indices=xxt_handle->info->col_indices; 626 PetscScalar *x_ptr, *uu_ptr; 627 PetscScalar *solve_uu=xxt_handle->info->solve_uu; 628 PetscScalar *solve_w =xxt_handle->info->solve_w; 629 PetscScalar *x =xxt_handle->info->x; 630 PetscBLASInt i1 = 1; 631 632 uu_ptr=solve_uu; 633 rvec_zero(uu_ptr,m); 634 635 /* x = X.Y^T.b */ 636 /* uu = Y^T.b */ 637 for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len) 638 { 639 off=*iptr++; len=*iptr++; 640 *uu_ptr++ = BLdot_(&len,uc+off,&i1,x_ptr,&i1); 641 } 642 643 /* comunication of beta */ 644 uu_ptr=solve_uu; 645 if (level) {ssgl_radd(uu_ptr, solve_w, level, stages);} 646 647 rvec_zero(uc,n); 648 649 /* x = X.uu */ 650 for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len) 651 { 652 off=*iptr++; len=*iptr++; 653 BLaxpy_(&len,uu_ptr++,x_ptr,&i1,uc+off,&i1); 654 } 655 656 } 657 658 659 /*************************************Xxt.c************************************ 660 Function: check_init 661 662 Input : 663 Output: 664 Return: 665 Description: 666 **************************************xxt.c***********************************/ 667 static 668 void 669 check_init(void) 670 { 671 comm_init(); 672 673 } 674 675 676 /*************************************xxt.c************************************ 677 Function: check_handle() 678 679 Input : 680 Output: 681 Return: 682 Description: 683 **************************************xxt.c***********************************/ 684 static 685 void 686 check_handle(xxt_ADT xxt_handle) 687 { 688 #ifdef SAFE 689 int vals[2], work[2], op[] = {NON_UNIFORM,GL_MIN,GL_MAX}; 690 #endif 691 692 693 if (xxt_handle==NULL) 694 {error_msg_fatal("check_handle() :: bad handle :: NULL %d\n",xxt_handle);} 695 696 #ifdef SAFE 697 vals[0]=vals[1]=xxt_handle->id; 698 giop(vals,work,sizeof(op)/sizeof(op[0])-1,op); 699 if ((vals[0]!=vals[1])||(xxt_handle->id<=0)) 700 {error_msg_fatal("check_handle() :: bad handle :: id mismatch min/max %d/%d %d\n", 701 vals[0],vals[1], xxt_handle->id);} 702 #endif 703 704 } 705 706 707 /*************************************xxt.c************************************ 708 Function: det_separators 709 710 Input : 711 Output: 712 Return: 713 Description: 714 det_separators(xxt_handle, local2global, n, m, mylocmatvec, grid_data); 715 **************************************xxt.c***********************************/ 716 static 717 void 718 det_separators(xxt_ADT xxt_handle) 719 { 720 int i, ct, id; 721 int mask, edge, *iptr; 722 int *dir, *used; 723 int sum[4], w[4]; 724 PetscScalar rsum[4], rw[4]; 725 int op[] = {GL_ADD,0}; 726 PetscScalar *lhs, *rhs; 727 int *nsep, *lnsep, *fo, nfo=0; 728 gs_ADT gs_handle=xxt_handle->mvi->gs_handle; 729 int *local2global=xxt_handle->mvi->local2global; 730 int n=xxt_handle->mvi->n; 731 int m=xxt_handle->mvi->m; 732 int level=xxt_handle->level; 733 int shared=FALSE; 734 735 dir = (int*)malloc(sizeof(PetscInt)*(level+1)); 736 nsep = (int*)malloc(sizeof(PetscInt)*(level+1)); 737 lnsep= (int*)malloc(sizeof(PetscInt)*(level+1)); 738 fo = (int*)malloc(sizeof(PetscInt)*(n+1)); 739 used = (int*)malloc(sizeof(PetscInt)*n); 740 741 ivec_zero(dir ,level+1); 742 ivec_zero(nsep ,level+1); 743 ivec_zero(lnsep,level+1); 744 ivec_set (fo ,-1,n+1); 745 ivec_zero(used,n); 746 747 lhs = (double*)malloc(sizeof(PetscScalar)*m); 748 rhs = (double*)malloc(sizeof(PetscScalar)*m); 749 750 /* determine the # of unique dof */ 751 rvec_zero(lhs,m); 752 rvec_set(lhs,1.0,n); 753 gs_gop_hc(gs_handle,lhs,"+\0",level); 754 rvec_zero(rsum,2); 755 for (ct=i=0;i<n;i++) 756 { 757 if (lhs[i]!=0.0) 758 {rsum[0]+=1.0/lhs[i]; rsum[1]+=lhs[i];} 759 } 760 grop_hc(rsum,rw,2,op,level); 761 rsum[0]+=0.1; 762 rsum[1]+=0.1; 763 /* if (!my_id) 764 { 765 printf("xxt n unique = %d (%g)\n",(int) rsum[0], rsum[0]); 766 printf("xxt n shared = %d (%g)\n",(int) rsum[1], rsum[1]); 767 }*/ 768 769 if (fabs(rsum[0]-rsum[1])>EPS) 770 {shared=TRUE;} 771 772 xxt_handle->info->n_global=xxt_handle->info->m_global=(int) rsum[0]; 773 xxt_handle->mvi->n_global =xxt_handle->mvi->m_global =(int) rsum[0]; 774 775 /* determine separator sets top down */ 776 if (shared) 777 { 778 for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1) 779 { 780 /* set rsh of hc, fire, and collect lhs responses */ 781 (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m); 782 gs_gop_hc(gs_handle,lhs,"+\0",edge); 783 784 /* set lsh of hc, fire, and collect rhs responses */ 785 (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m); 786 gs_gop_hc(gs_handle,rhs,"+\0",edge); 787 788 for (i=0;i<n;i++) 789 { 790 if (id< mask) 791 { 792 if (lhs[i]!=0.0) 793 {lhs[i]=1.0;} 794 } 795 if (id>=mask) 796 { 797 if (rhs[i]!=0.0) 798 {rhs[i]=1.0;} 799 } 800 } 801 802 if (id< mask) 803 {gs_gop_hc(gs_handle,lhs,"+\0",edge-1);} 804 else 805 {gs_gop_hc(gs_handle,rhs,"+\0",edge-1);} 806 807 /* count number of dofs I own that have signal and not in sep set */ 808 rvec_zero(rsum,4); 809 for (ivec_zero(sum,4),ct=i=0;i<n;i++) 810 { 811 if (!used[i]) 812 { 813 /* number of unmarked dofs on node */ 814 ct++; 815 /* number of dofs to be marked on lhs hc */ 816 if (id< mask) 817 { 818 if (lhs[i]!=0.0) 819 {sum[0]++; rsum[0]+=1.0/lhs[i];} 820 } 821 /* number of dofs to be marked on rhs hc */ 822 if (id>=mask) 823 { 824 if (rhs[i]!=0.0) 825 {sum[1]++; rsum[1]+=1.0/rhs[i];} 826 } 827 } 828 } 829 830 /* go for load balance - choose half with most unmarked dofs, bias LHS */ 831 (id<mask) ? (sum[2]=ct) : (sum[3]=ct); 832 (id<mask) ? (rsum[2]=ct) : (rsum[3]=ct); 833 giop_hc(sum,w,4,op,edge); 834 grop_hc(rsum,rw,4,op,edge); 835 rsum[0]+=0.1; rsum[1]+=0.1; rsum[2]+=0.1; rsum[3]+=0.1; 836 837 if (id<mask) 838 { 839 /* mark dofs I own that have signal and not in sep set */ 840 for (ct=i=0;i<n;i++) 841 { 842 if ((!used[i])&&(lhs[i]!=0.0)) 843 { 844 ct++; nfo++; 845 846 if (nfo>n) 847 {error_msg_fatal("nfo about to exceed n\n");} 848 849 *--iptr = local2global[i]; 850 used[i]=edge; 851 } 852 } 853 if (ct>1) {ivec_sort(iptr,ct);} 854 855 lnsep[edge]=ct; 856 nsep[edge]=(int) rsum[0]; 857 dir [edge]=LEFT; 858 } 859 860 if (id>=mask) 861 { 862 /* mark dofs I own that have signal and not in sep set */ 863 for (ct=i=0;i<n;i++) 864 { 865 if ((!used[i])&&(rhs[i]!=0.0)) 866 { 867 ct++; nfo++; 868 869 if (nfo>n) 870 {error_msg_fatal("nfo about to exceed n\n");} 871 872 *--iptr = local2global[i]; 873 used[i]=edge; 874 } 875 } 876 if (ct>1) {ivec_sort(iptr,ct);} 877 878 lnsep[edge]=ct; 879 nsep[edge]= (int) rsum[1]; 880 dir [edge]=RIGHT; 881 } 882 883 /* LATER or we can recur on these to order seps at this level */ 884 /* do we need full set of separators for this? */ 885 886 /* fold rhs hc into lower */ 887 if (id>=mask) 888 {id-=mask;} 889 } 890 } 891 else 892 { 893 for (iptr=fo+n,id=my_id,mask=num_nodes>>1,edge=level;edge>0;edge--,mask>>=1) 894 { 895 /* set rsh of hc, fire, and collect lhs responses */ 896 (id<mask) ? rvec_zero(lhs,m) : rvec_set(lhs,1.0,m); 897 gs_gop_hc(gs_handle,lhs,"+\0",edge); 898 899 /* set lsh of hc, fire, and collect rhs responses */ 900 (id<mask) ? rvec_set(rhs,1.0,m) : rvec_zero(rhs,m); 901 gs_gop_hc(gs_handle,rhs,"+\0",edge); 902 903 /* count number of dofs I own that have signal and not in sep set */ 904 for (ivec_zero(sum,4),ct=i=0;i<n;i++) 905 { 906 if (!used[i]) 907 { 908 /* number of unmarked dofs on node */ 909 ct++; 910 /* number of dofs to be marked on lhs hc */ 911 if ((id< mask)&&(lhs[i]!=0.0)) {sum[0]++;} 912 /* number of dofs to be marked on rhs hc */ 913 if ((id>=mask)&&(rhs[i]!=0.0)) {sum[1]++;} 914 } 915 } 916 917 /* go for load balance - choose half with most unmarked dofs, bias LHS */ 918 (id<mask) ? (sum[2]=ct) : (sum[3]=ct); 919 giop_hc(sum,w,4,op,edge); 920 921 /* lhs hc wins */ 922 if (sum[2]>=sum[3]) 923 { 924 if (id<mask) 925 { 926 /* mark dofs I own that have signal and not in sep set */ 927 for (ct=i=0;i<n;i++) 928 { 929 if ((!used[i])&&(lhs[i]!=0.0)) 930 { 931 ct++; nfo++; 932 *--iptr = local2global[i]; 933 used[i]=edge; 934 } 935 } 936 if (ct>1) {ivec_sort(iptr,ct);} 937 lnsep[edge]=ct; 938 } 939 nsep[edge]=sum[0]; 940 dir [edge]=LEFT; 941 } 942 /* rhs hc wins */ 943 else 944 { 945 if (id>=mask) 946 { 947 /* mark dofs I own that have signal and not in sep set */ 948 for (ct=i=0;i<n;i++) 949 { 950 if ((!used[i])&&(rhs[i]!=0.0)) 951 { 952 ct++; nfo++; 953 *--iptr = local2global[i]; 954 used[i]=edge; 955 } 956 } 957 if (ct>1) {ivec_sort(iptr,ct);} 958 lnsep[edge]=ct; 959 } 960 nsep[edge]=sum[1]; 961 dir [edge]=RIGHT; 962 } 963 /* LATER or we can recur on these to order seps at this level */ 964 /* do we need full set of separators for this? */ 965 966 /* fold rhs hc into lower */ 967 if (id>=mask) 968 {id-=mask;} 969 } 970 } 971 972 973 /* level 0 is on processor case - so mark the remainder */ 974 for (ct=i=0;i<n;i++) 975 { 976 if (!used[i]) 977 { 978 ct++; nfo++; 979 *--iptr = local2global[i]; 980 used[i]=edge; 981 } 982 } 983 if (ct>1) {ivec_sort(iptr,ct);} 984 lnsep[edge]=ct; 985 nsep [edge]=ct; 986 dir [edge]=LEFT; 987 988 xxt_handle->info->nsep=nsep; 989 xxt_handle->info->lnsep=lnsep; 990 xxt_handle->info->fo=fo; 991 xxt_handle->info->nfo=nfo; 992 993 free(dir); 994 free(lhs); 995 free(rhs); 996 free(used); 997 998 } 999 1000 1001 /*************************************xxt.c************************************ 1002 Function: set_mvi 1003 1004 Input : 1005 Output: 1006 Return: 1007 Description: 1008 **************************************xxt.c***********************************/ 1009 static 1010 mv_info *set_mvi(int *local2global, int n, int m, void *matvec, void *grid_data) 1011 { 1012 mv_info *mvi; 1013 1014 1015 mvi = (mv_info*)malloc(sizeof(mv_info)); 1016 mvi->n=n; 1017 mvi->m=m; 1018 mvi->n_global=-1; 1019 mvi->m_global=-1; 1020 mvi->local2global=(int*)malloc((m+1)*sizeof(PetscInt)); 1021 ivec_copy(mvi->local2global,local2global,m); 1022 mvi->local2global[m] = INT_MAX; 1023 mvi->matvec=(PetscErrorCode (*)(mv_info*,PetscScalar*,PetscScalar*))matvec; 1024 mvi->grid_data=grid_data; 1025 1026 /* set xxt communication handle to perform restricted matvec */ 1027 mvi->gs_handle = gs_init(local2global, m, num_nodes); 1028 1029 return(mvi); 1030 } 1031 1032 1033 /*************************************xxt.c************************************ 1034 Function: set_mvi 1035 1036 Input : 1037 Output: 1038 Return: 1039 Description: 1040 1041 computes u = A.v 1042 do_matvec(xxt_handle->mvi,v,u); 1043 **************************************xxt.c***********************************/ 1044 static 1045 void do_matvec(mv_info *A, PetscScalar *v, PetscScalar *u) 1046 { 1047 A->matvec((mv_info*)A->grid_data,v,u); 1048 } 1049 1050 1051 1052