1*c4fb69feSStefano Zampini static char help[] = "Tests MATHARA\n\n"; 2*c4fb69feSStefano Zampini 3*c4fb69feSStefano Zampini #include <petscmat.h> 4*c4fb69feSStefano Zampini #include <petscsf.h> 5*c4fb69feSStefano Zampini 6*c4fb69feSStefano Zampini static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx) 7*c4fb69feSStefano Zampini { 8*c4fb69feSStefano Zampini PetscInt d; 9*c4fb69feSStefano Zampini PetscReal clength = sdim == 3 ? 0.2 : 0.1; 10*c4fb69feSStefano Zampini PetscReal dist, diff = 0.0; 11*c4fb69feSStefano Zampini 12*c4fb69feSStefano Zampini for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); } 13*c4fb69feSStefano Zampini dist = PetscSqrtReal(diff); 14*c4fb69feSStefano Zampini return PetscExpReal(-dist / clength); 15*c4fb69feSStefano Zampini } 16*c4fb69feSStefano Zampini 17*c4fb69feSStefano Zampini static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx) 18*c4fb69feSStefano Zampini { 19*c4fb69feSStefano Zampini PetscInt d; 20*c4fb69feSStefano Zampini PetscReal clength = sdim == 3 ? 0.2 : 0.1; 21*c4fb69feSStefano Zampini PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0; 22*c4fb69feSStefano Zampini 23*c4fb69feSStefano Zampini for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; } 24*c4fb69feSStefano Zampini for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; } 25*c4fb69feSStefano Zampini for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); } 26*c4fb69feSStefano Zampini dist = PetscSqrtReal(diff); 27*c4fb69feSStefano Zampini return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.; 28*c4fb69feSStefano Zampini } 29*c4fb69feSStefano Zampini 30*c4fb69feSStefano Zampini int main(int argc,char **argv) 31*c4fb69feSStefano Zampini { 32*c4fb69feSStefano Zampini Mat A,B,C,D; 33*c4fb69feSStefano Zampini Vec v,x,y,Ax,Ay,Bx,By; 34*c4fb69feSStefano Zampini PetscRandom r; 35*c4fb69feSStefano Zampini PetscLayout map; 36*c4fb69feSStefano Zampini PetscScalar *Adata = NULL, *Cdata = NULL, scale = 1.0; 37*c4fb69feSStefano Zampini PetscReal *coords,nA,nD,nB,err,nX,norms[3]; 38*c4fb69feSStefano Zampini PetscInt N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, nt, ntrials = 2; 39*c4fb69feSStefano Zampini PetscMPIInt size,rank; 40*c4fb69feSStefano Zampini PetscBool testlayout = PETSC_FALSE,flg,symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE; 41*c4fb69feSStefano Zampini PetscBool checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE; 42*c4fb69feSStefano Zampini PetscBool testtrans, testnorm, randommat = PETSC_TRUE; 43*c4fb69feSStefano Zampini void (*approxnormfunc)(void); 44*c4fb69feSStefano Zampini void (*Anormfunc)(void); 45*c4fb69feSStefano Zampini PetscErrorCode ierr; 46*c4fb69feSStefano Zampini 47*c4fb69feSStefano Zampini PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE; 48*c4fb69feSStefano Zampini ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr; 49*c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 50*c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr); 51*c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);CHKERRQ(ierr); 52*c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);CHKERRQ(ierr); 53*c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);CHKERRQ(ierr); 54*c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);CHKERRQ(ierr); 55*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);CHKERRQ(ierr); 56*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL);CHKERRQ(ierr); 57*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);CHKERRQ(ierr); 58*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);CHKERRQ(ierr); 59*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);CHKERRQ(ierr); 60*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);CHKERRQ(ierr); 61*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);CHKERRQ(ierr); 62*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);CHKERRQ(ierr); 63*c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);CHKERRQ(ierr); 64*c4fb69feSStefano Zampini ierr = PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);CHKERRQ(ierr); 65*c4fb69feSStefano Zampini if (!Asymm) symm = PETSC_FALSE; 66*c4fb69feSStefano Zampini 67*c4fb69feSStefano Zampini ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 68*c4fb69feSStefano Zampini /* MatMultTranspose for nonsymmetric matrices not implemented */ 69*c4fb69feSStefano Zampini testtrans = (PetscBool)(size == 1 || symm); 70*c4fb69feSStefano Zampini testnorm = (PetscBool)(size == 1 || symm); 71*c4fb69feSStefano Zampini 72*c4fb69feSStefano Zampini ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 73*c4fb69feSStefano Zampini ierr = PetscLayoutCreate(PETSC_COMM_WORLD,&map);CHKERRQ(ierr); 74*c4fb69feSStefano Zampini if (testlayout) { 75*c4fb69feSStefano Zampini if (rank%2) n = PetscMax(2*n-5*rank,0); 76*c4fb69feSStefano Zampini else n = 2*n+rank; 77*c4fb69feSStefano Zampini } 78*c4fb69feSStefano Zampini ierr = PetscLayoutSetLocalSize(map,n);CHKERRQ(ierr); 79*c4fb69feSStefano Zampini ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 80*c4fb69feSStefano Zampini ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 81*c4fb69feSStefano Zampini ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 82*c4fb69feSStefano Zampini 83*c4fb69feSStefano Zampini ierr = PetscRandomCreate(PETSC_COMM_WORLD,&r);CHKERRQ(ierr); 84*c4fb69feSStefano Zampini ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); 85*c4fb69feSStefano Zampini if (lda) { 86*c4fb69feSStefano Zampini ierr = PetscMalloc1(N*(n+lda),&Adata);CHKERRQ(ierr); 87*c4fb69feSStefano Zampini } 88*c4fb69feSStefano Zampini ierr = MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);CHKERRQ(ierr); 89*c4fb69feSStefano Zampini ierr = MatDenseSetLDA(A,n+lda);CHKERRQ(ierr); 90*c4fb69feSStefano Zampini ierr = PetscMalloc1(n*dim,&coords);CHKERRQ(ierr); 91*c4fb69feSStefano Zampini for (j = 0; j < n; j++) { 92*c4fb69feSStefano Zampini for (i = 0; i < dim; i++) { 93*c4fb69feSStefano Zampini PetscScalar a; 94*c4fb69feSStefano Zampini 95*c4fb69feSStefano Zampini ierr = PetscRandomGetValue(r,&a);CHKERRQ(ierr); 96*c4fb69feSStefano Zampini coords[j*dim + i] = PetscRealPart(a); 97*c4fb69feSStefano Zampini } 98*c4fb69feSStefano Zampini } 99*c4fb69feSStefano Zampini if (kernel || !randommat) { 100*c4fb69feSStefano Zampini MatHaraKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm; 101*c4fb69feSStefano Zampini PetscInt ist,ien; 102*c4fb69feSStefano Zampini PetscReal *gcoords; 103*c4fb69feSStefano Zampini 104*c4fb69feSStefano Zampini if (size > 1) { /* replicated coords so that we can populate the dense matrix */ 105*c4fb69feSStefano Zampini PetscSF sf; 106*c4fb69feSStefano Zampini MPI_Datatype dtype; 107*c4fb69feSStefano Zampini 108*c4fb69feSStefano Zampini ierr = MPI_Type_contiguous(dim,MPIU_REAL,&dtype);CHKERRQ(ierr); 109*c4fb69feSStefano Zampini ierr = MPI_Type_commit(&dtype);CHKERRQ(ierr); 110*c4fb69feSStefano Zampini 111*c4fb69feSStefano Zampini ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr); 112*c4fb69feSStefano Zampini ierr = MatGetLayouts(A,&map,NULL);CHKERRQ(ierr); 113*c4fb69feSStefano Zampini ierr = PetscSFSetGraphWithPattern(sf,map,PETSCSF_PATTERN_ALLGATHER);CHKERRQ(ierr); 114*c4fb69feSStefano Zampini ierr = PetscMalloc1(dim*N,&gcoords);CHKERRQ(ierr); 115*c4fb69feSStefano Zampini ierr = PetscSFBcastBegin(sf,dtype,coords,gcoords);CHKERRQ(ierr); 116*c4fb69feSStefano Zampini ierr = PetscSFBcastEnd(sf,dtype,coords,gcoords);CHKERRQ(ierr); 117*c4fb69feSStefano Zampini ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 118*c4fb69feSStefano Zampini ierr = MPI_Type_free(&dtype);CHKERRQ(ierr); 119*c4fb69feSStefano Zampini } else gcoords = (PetscReal*)coords; 120*c4fb69feSStefano Zampini 121*c4fb69feSStefano Zampini ierr = MatGetOwnershipRange(A,&ist,&ien);CHKERRQ(ierr); 122*c4fb69feSStefano Zampini for (i = ist; i < ien; i++) { 123*c4fb69feSStefano Zampini for (j = 0; j < N; j++) { 124*c4fb69feSStefano Zampini ierr = MatSetValue(A,i,j,(*k)(dim,gcoords + i*dim,gcoords + j*dim,NULL),INSERT_VALUES);CHKERRQ(ierr); 125*c4fb69feSStefano Zampini } 126*c4fb69feSStefano Zampini } 127*c4fb69feSStefano Zampini ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 128*c4fb69feSStefano Zampini ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 129*c4fb69feSStefano Zampini if (kernel) { 130*c4fb69feSStefano Zampini ierr = MatCreateHaraFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr); 131*c4fb69feSStefano Zampini } else { 132*c4fb69feSStefano Zampini ierr = MatCreateHaraFromMat(A,dim,coords,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr); 133*c4fb69feSStefano Zampini } 134*c4fb69feSStefano Zampini if (gcoords != coords) { ierr = PetscFree(gcoords);CHKERRQ(ierr); } 135*c4fb69feSStefano Zampini } else { 136*c4fb69feSStefano Zampini ierr = MatSetRandom(A,r);CHKERRQ(ierr); 137*c4fb69feSStefano Zampini if (Asymm) { 138*c4fb69feSStefano Zampini ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 139*c4fb69feSStefano Zampini ierr = MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 140*c4fb69feSStefano Zampini ierr = MatDestroy(&B);CHKERRQ(ierr); 141*c4fb69feSStefano Zampini ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 142*c4fb69feSStefano Zampini } 143*c4fb69feSStefano Zampini ierr = MatCreateHaraFromMat(A,dim,coords,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr); 144*c4fb69feSStefano Zampini } 145*c4fb69feSStefano Zampini if (agpu) { 146*c4fb69feSStefano Zampini ierr = MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 147*c4fb69feSStefano Zampini } 148*c4fb69feSStefano Zampini ierr = MatViewFromOptions(A,NULL,"-A_view");CHKERRQ(ierr); 149*c4fb69feSStefano Zampini 150*c4fb69feSStefano Zampini ierr = MatSetOption(B,MAT_SYMMETRIC,symm);CHKERRQ(ierr); 151*c4fb69feSStefano Zampini ierr = PetscFree(coords);CHKERRQ(ierr); 152*c4fb69feSStefano Zampini 153*c4fb69feSStefano Zampini /* assemble the H-matrix */ 154*c4fb69feSStefano Zampini ierr = MatBindToCPU(B,(PetscBool)!bgpu);CHKERRQ(ierr); 155*c4fb69feSStefano Zampini ierr = MatSetFromOptions(B);CHKERRQ(ierr); 156*c4fb69feSStefano Zampini ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 157*c4fb69feSStefano Zampini ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 158*c4fb69feSStefano Zampini ierr = MatViewFromOptions(B,NULL,"-B_view");CHKERRQ(ierr); 159*c4fb69feSStefano Zampini 160*c4fb69feSStefano Zampini /* Test MatScale */ 161*c4fb69feSStefano Zampini ierr = MatScale(A,scale);CHKERRQ(ierr); 162*c4fb69feSStefano Zampini ierr = MatScale(B,scale);CHKERRQ(ierr); 163*c4fb69feSStefano Zampini 164*c4fb69feSStefano Zampini /* Test MatMult */ 165*c4fb69feSStefano Zampini ierr = MatCreateVecs(A,&Ax,&Ay);CHKERRQ(ierr); 166*c4fb69feSStefano Zampini ierr = MatCreateVecs(B,&Bx,&By);CHKERRQ(ierr); 167*c4fb69feSStefano Zampini ierr = VecSetRandom(Ax,r);CHKERRQ(ierr); 168*c4fb69feSStefano Zampini ierr = VecCopy(Ax,Bx);CHKERRQ(ierr); 169*c4fb69feSStefano Zampini ierr = MatMult(A,Ax,Ay);CHKERRQ(ierr); 170*c4fb69feSStefano Zampini ierr = MatMult(B,Bx,By);CHKERRQ(ierr); 171*c4fb69feSStefano Zampini ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr); 172*c4fb69feSStefano Zampini ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr); 173*c4fb69feSStefano Zampini ierr = VecNorm(Ay,NORM_INFINITY,&nX);CHKERRQ(ierr); 174*c4fb69feSStefano Zampini ierr = VecAXPY(Ay,-1.0,By);CHKERRQ(ierr); 175*c4fb69feSStefano Zampini ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr); 176*c4fb69feSStefano Zampini ierr = VecNorm(Ay,NORM_INFINITY,&err);CHKERRQ(ierr); 177*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);CHKERRQ(ierr); 178*c4fb69feSStefano Zampini ierr = VecScale(By,-1.0);CHKERRQ(ierr); 179*c4fb69feSStefano Zampini ierr = MatMultAdd(B,Bx,By,By);CHKERRQ(ierr); 180*c4fb69feSStefano Zampini ierr = VecNorm(By,NORM_INFINITY,&err);CHKERRQ(ierr); 181*c4fb69feSStefano Zampini ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr); 182*c4fb69feSStefano Zampini if (err > PETSC_SMALL) { 183*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);CHKERRQ(ierr); 184*c4fb69feSStefano Zampini } 185*c4fb69feSStefano Zampini 186*c4fb69feSStefano Zampini /* Test MatNorm */ 187*c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr); 188*c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr); 189*c4fb69feSStefano Zampini norms[2] = -1.; /* NORM_2 not supported */ 190*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr); 191*c4fb69feSStefano Zampini ierr = MatGetOperation(A,MATOP_NORM,&Anormfunc);CHKERRQ(ierr); 192*c4fb69feSStefano Zampini ierr = MatGetOperation(B,MATOP_NORM,&approxnormfunc);CHKERRQ(ierr); 193*c4fb69feSStefano Zampini ierr = MatSetOperation(A,MATOP_NORM,approxnormfunc);CHKERRQ(ierr); 194*c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr); 195*c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr); 196*c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_2,&norms[2]);CHKERRQ(ierr); 197*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr); 198*c4fb69feSStefano Zampini if (testnorm) { 199*c4fb69feSStefano Zampini ierr = MatNorm(B,NORM_INFINITY,&norms[0]);CHKERRQ(ierr); 200*c4fb69feSStefano Zampini ierr = MatNorm(B,NORM_1,&norms[1]);CHKERRQ(ierr); 201*c4fb69feSStefano Zampini ierr = MatNorm(B,NORM_2,&norms[2]);CHKERRQ(ierr); 202*c4fb69feSStefano Zampini } else { 203*c4fb69feSStefano Zampini norms[0] = -1.; 204*c4fb69feSStefano Zampini norms[1] = -1.; 205*c4fb69feSStefano Zampini norms[2] = -1.; 206*c4fb69feSStefano Zampini } 207*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr); 208*c4fb69feSStefano Zampini ierr = MatSetOperation(A,MATOP_NORM,Anormfunc);CHKERRQ(ierr); 209*c4fb69feSStefano Zampini 210*c4fb69feSStefano Zampini /* Test MatDuplicate */ 211*c4fb69feSStefano Zampini ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr); 212*c4fb69feSStefano Zampini ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr); 213*c4fb69feSStefano Zampini if (!flg) { 214*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");CHKERRQ(ierr); 215*c4fb69feSStefano Zampini } 216*c4fb69feSStefano Zampini if (testtrans) { 217*c4fb69feSStefano Zampini ierr = MatMultTransposeEqual(B,D,10,&flg);CHKERRQ(ierr); 218*c4fb69feSStefano Zampini if (!flg) { 219*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranpose error after MatDuplicate\n");CHKERRQ(ierr); 220*c4fb69feSStefano Zampini } 221*c4fb69feSStefano Zampini } 222*c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 223*c4fb69feSStefano Zampini 224*c4fb69feSStefano Zampini if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */ 225*c4fb69feSStefano Zampini ierr = VecSetRandom(Ay,r);CHKERRQ(ierr); 226*c4fb69feSStefano Zampini ierr = VecCopy(Ay,By);CHKERRQ(ierr); 227*c4fb69feSStefano Zampini ierr = MatMultTranspose(A,Ay,Ax);CHKERRQ(ierr); 228*c4fb69feSStefano Zampini ierr = MatMultTranspose(B,By,Bx);CHKERRQ(ierr); 229*c4fb69feSStefano Zampini ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr); 230*c4fb69feSStefano Zampini ierr = VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");CHKERRQ(ierr); 231*c4fb69feSStefano Zampini ierr = VecNorm(Ax,NORM_INFINITY,&nX);CHKERRQ(ierr); 232*c4fb69feSStefano Zampini ierr = VecAXPY(Ax,-1.0,Bx);CHKERRQ(ierr); 233*c4fb69feSStefano Zampini ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr); 234*c4fb69feSStefano Zampini ierr = VecNorm(Ax,NORM_INFINITY,&err);CHKERRQ(ierr); 235*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);CHKERRQ(ierr); 236*c4fb69feSStefano Zampini ierr = VecScale(Bx,-1.0);CHKERRQ(ierr); 237*c4fb69feSStefano Zampini ierr = MatMultTransposeAdd(B,By,Bx,Bx);CHKERRQ(ierr); 238*c4fb69feSStefano Zampini ierr = VecNorm(Bx,NORM_INFINITY,&err);CHKERRQ(ierr); 239*c4fb69feSStefano Zampini if (err > PETSC_SMALL) { 240*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);CHKERRQ(ierr); 241*c4fb69feSStefano Zampini } 242*c4fb69feSStefano Zampini } 243*c4fb69feSStefano Zampini ierr = VecDestroy(&Ax);CHKERRQ(ierr); 244*c4fb69feSStefano Zampini ierr = VecDestroy(&Ay);CHKERRQ(ierr); 245*c4fb69feSStefano Zampini ierr = VecDestroy(&Bx);CHKERRQ(ierr); 246*c4fb69feSStefano Zampini ierr = VecDestroy(&By);CHKERRQ(ierr); 247*c4fb69feSStefano Zampini 248*c4fb69feSStefano Zampini /* Test MatMatMult */ 249*c4fb69feSStefano Zampini if (ldc) { 250*c4fb69feSStefano Zampini ierr = PetscMalloc1(nrhs*(n+ldc),&Cdata);CHKERRQ(ierr); 251*c4fb69feSStefano Zampini } 252*c4fb69feSStefano Zampini ierr = MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);CHKERRQ(ierr); 253*c4fb69feSStefano Zampini ierr = MatDenseSetLDA(C,n+ldc);CHKERRQ(ierr); 254*c4fb69feSStefano Zampini ierr = MatSetRandom(C,r);CHKERRQ(ierr); 255*c4fb69feSStefano Zampini if (cgpu) { 256*c4fb69feSStefano Zampini ierr = MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);CHKERRQ(ierr); 257*c4fb69feSStefano Zampini } 258*c4fb69feSStefano Zampini for (nt = 0; nt < ntrials; nt++) { 259*c4fb69feSStefano Zampini ierr = MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr); 260*c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-bc_view");CHKERRQ(ierr); 261*c4fb69feSStefano Zampini ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr); 262*c4fb69feSStefano Zampini if (flg) { 263*c4fb69feSStefano Zampini ierr = MatCreateVecs(B,&x,&y);CHKERRQ(ierr); 264*c4fb69feSStefano Zampini ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr); 265*c4fb69feSStefano Zampini for (i = 0; i < nrhs; i++) { 266*c4fb69feSStefano Zampini ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr); 267*c4fb69feSStefano Zampini ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr); 268*c4fb69feSStefano Zampini ierr = MatMult(B,x,y);CHKERRQ(ierr); 269*c4fb69feSStefano Zampini ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr); 270*c4fb69feSStefano Zampini ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr); 271*c4fb69feSStefano Zampini if (err > PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMat err %D %g\n",i,err);CHKERRQ(ierr); } 272*c4fb69feSStefano Zampini } 273*c4fb69feSStefano Zampini ierr = VecDestroy(&y);CHKERRQ(ierr); 274*c4fb69feSStefano Zampini ierr = VecDestroy(&x);CHKERRQ(ierr); 275*c4fb69feSStefano Zampini ierr = VecDestroy(&v);CHKERRQ(ierr); 276*c4fb69feSStefano Zampini } 277*c4fb69feSStefano Zampini } 278*c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 279*c4fb69feSStefano Zampini 280*c4fb69feSStefano Zampini /* Test MatTransposeMatMult */ 281*c4fb69feSStefano Zampini if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */ 282*c4fb69feSStefano Zampini for (nt = 0; nt < ntrials; nt++) { 283*c4fb69feSStefano Zampini ierr = MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr); 284*c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-btc_view");CHKERRQ(ierr); 285*c4fb69feSStefano Zampini ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr); 286*c4fb69feSStefano Zampini if (flg) { 287*c4fb69feSStefano Zampini ierr = MatCreateVecs(B,&y,&x);CHKERRQ(ierr); 288*c4fb69feSStefano Zampini ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr); 289*c4fb69feSStefano Zampini for (i = 0; i < nrhs; i++) { 290*c4fb69feSStefano Zampini ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr); 291*c4fb69feSStefano Zampini ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr); 292*c4fb69feSStefano Zampini ierr = MatMultTranspose(B,x,y);CHKERRQ(ierr); 293*c4fb69feSStefano Zampini ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr); 294*c4fb69feSStefano Zampini ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr); 295*c4fb69feSStefano Zampini if (err > PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %D %g\n",i,err);CHKERRQ(ierr); } 296*c4fb69feSStefano Zampini } 297*c4fb69feSStefano Zampini ierr = VecDestroy(&y);CHKERRQ(ierr); 298*c4fb69feSStefano Zampini ierr = VecDestroy(&x);CHKERRQ(ierr); 299*c4fb69feSStefano Zampini ierr = VecDestroy(&v);CHKERRQ(ierr); 300*c4fb69feSStefano Zampini } 301*c4fb69feSStefano Zampini } 302*c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 303*c4fb69feSStefano Zampini } 304*c4fb69feSStefano Zampini 305*c4fb69feSStefano Zampini /* check explicit operator */ 306*c4fb69feSStefano Zampini if (checkexpl) { 307*c4fb69feSStefano Zampini Mat Be, Bet; 308*c4fb69feSStefano Zampini 309*c4fb69feSStefano Zampini ierr = MatComputeOperator(B,MATDENSE,&D);CHKERRQ(ierr); 310*c4fb69feSStefano Zampini ierr = MatDuplicate(D,MAT_COPY_VALUES,&Be);CHKERRQ(ierr); 311*c4fb69feSStefano Zampini ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr); 312*c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-expl_view");CHKERRQ(ierr); 313*c4fb69feSStefano Zampini ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 314*c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-diff_view");CHKERRQ(ierr); 315*c4fb69feSStefano Zampini ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr); 316*c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr); 317*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr); 318*c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 319*c4fb69feSStefano Zampini 320*c4fb69feSStefano Zampini if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */ 321*c4fb69feSStefano Zampini ierr = MatTranspose(A,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 322*c4fb69feSStefano Zampini ierr = MatComputeOperatorTranspose(B,MATDENSE,&D);CHKERRQ(ierr); 323*c4fb69feSStefano Zampini ierr = MatDuplicate(D,MAT_COPY_VALUES,&Bet);CHKERRQ(ierr); 324*c4fb69feSStefano Zampini ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr); 325*c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-expl_trans_view");CHKERRQ(ierr); 326*c4fb69feSStefano Zampini ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 327*c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-diff_trans_view");CHKERRQ(ierr); 328*c4fb69feSStefano Zampini ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr); 329*c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr); 330*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr); 331*c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 332*c4fb69feSStefano Zampini 333*c4fb69feSStefano Zampini ierr = MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);CHKERRQ(ierr); 334*c4fb69feSStefano Zampini ierr = MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 335*c4fb69feSStefano Zampini ierr = MatViewFromOptions(Be,NULL,"-diff_expl_view");CHKERRQ(ierr); 336*c4fb69feSStefano Zampini ierr = MatNorm(Be,NORM_FROBENIUS,&nB);CHKERRQ(ierr); 337*c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);CHKERRQ(ierr); 338*c4fb69feSStefano Zampini ierr = MatDestroy(&Be);CHKERRQ(ierr); 339*c4fb69feSStefano Zampini ierr = MatDestroy(&Bet);CHKERRQ(ierr); 340*c4fb69feSStefano Zampini } 341*c4fb69feSStefano Zampini } 342*c4fb69feSStefano Zampini ierr = MatDestroy(&A);CHKERRQ(ierr); 343*c4fb69feSStefano Zampini ierr = MatDestroy(&B);CHKERRQ(ierr); 344*c4fb69feSStefano Zampini ierr = MatDestroy(&C);CHKERRQ(ierr); 345*c4fb69feSStefano Zampini ierr = PetscRandomDestroy(&r);CHKERRQ(ierr); 346*c4fb69feSStefano Zampini ierr = PetscFree(Cdata);CHKERRQ(ierr); 347*c4fb69feSStefano Zampini ierr = PetscFree(Adata);CHKERRQ(ierr); 348*c4fb69feSStefano Zampini ierr = PetscFinalize(); 349*c4fb69feSStefano Zampini return ierr; 350*c4fb69feSStefano Zampini } 351*c4fb69feSStefano Zampini 352*c4fb69feSStefano Zampini /*TEST 353*c4fb69feSStefano Zampini 354*c4fb69feSStefano Zampini build: 355*c4fb69feSStefano Zampini requires: hara 356*c4fb69feSStefano Zampini 357*c4fb69feSStefano Zampini #tests from kernel 358*c4fb69feSStefano Zampini test: 359*c4fb69feSStefano Zampini requires: hara 360*c4fb69feSStefano Zampini nsize: 1 361*c4fb69feSStefano Zampini suffix: 1 362*c4fb69feSStefano Zampini args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0 363*c4fb69feSStefano Zampini 364*c4fb69feSStefano Zampini test: 365*c4fb69feSStefano Zampini requires: hara 366*c4fb69feSStefano Zampini nsize: 1 367*c4fb69feSStefano Zampini suffix: 1_ld 368*c4fb69feSStefano Zampini output_file: output/ex66_1.out 369*c4fb69feSStefano Zampini args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 0 370*c4fb69feSStefano Zampini 371*c4fb69feSStefano Zampini test: 372*c4fb69feSStefano Zampini requires: hara cuda 373*c4fb69feSStefano Zampini nsize: 1 374*c4fb69feSStefano Zampini suffix: 1_cuda 375*c4fb69feSStefano Zampini output_file: output/ex66_1.out 376*c4fb69feSStefano Zampini args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1 377*c4fb69feSStefano Zampini 378*c4fb69feSStefano Zampini test: 379*c4fb69feSStefano Zampini requires: hara cuda 380*c4fb69feSStefano Zampini nsize: 1 381*c4fb69feSStefano Zampini suffix: 1_cuda_ld 382*c4fb69feSStefano Zampini output_file: output/ex66_1.out 383*c4fb69feSStefano Zampini args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 1 384*c4fb69feSStefano Zampini 385*c4fb69feSStefano Zampini test: 386*c4fb69feSStefano Zampini requires: hara define(PETSC_HAVE_MPI_INIT_THREAD) 387*c4fb69feSStefano Zampini nsize: 2 388*c4fb69feSStefano Zampini suffix: 1_par 389*c4fb69feSStefano Zampini args: -n 32 -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0 390*c4fb69feSStefano Zampini 391*c4fb69feSStefano Zampini test: 392*c4fb69feSStefano Zampini requires: hara cuda define(PETSC_HAVE_MPI_INIT_THREAD) 393*c4fb69feSStefano Zampini nsize: 2 394*c4fb69feSStefano Zampini suffix: 1_par_cuda 395*c4fb69feSStefano Zampini args: -n 32 -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}} 396*c4fb69feSStefano Zampini output_file: output/ex66_1_par.out 397*c4fb69feSStefano Zampini 398*c4fb69feSStefano Zampini #tests from matrix sampling (parallel or unsymmetric not supported) 399*c4fb69feSStefano Zampini test: 400*c4fb69feSStefano Zampini requires: hara 401*c4fb69feSStefano Zampini nsize: 1 402*c4fb69feSStefano Zampini suffix: 2 403*c4fb69feSStefano Zampini args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0 404*c4fb69feSStefano Zampini 405*c4fb69feSStefano Zampini test: 406*c4fb69feSStefano Zampini requires: hara cuda 407*c4fb69feSStefano Zampini nsize: 1 408*c4fb69feSStefano Zampini suffix: 2_cuda 409*c4fb69feSStefano Zampini output_file: output/ex66_2.out 410*c4fb69feSStefano Zampini args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}} 411*c4fb69feSStefano Zampini 412*c4fb69feSStefano Zampini TEST*/ 413