1*53022affSStefano Zampini static char help[] = "Tests MATH2OPUS\n\n"; 2c4fb69feSStefano Zampini 3c4fb69feSStefano Zampini #include <petscmat.h> 4c4fb69feSStefano Zampini #include <petscsf.h> 5c4fb69feSStefano Zampini 6c4fb69feSStefano Zampini static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx) 7c4fb69feSStefano Zampini { 8c4fb69feSStefano Zampini PetscInt d; 9c4fb69feSStefano Zampini PetscReal clength = sdim == 3 ? 0.2 : 0.1; 10c4fb69feSStefano Zampini PetscReal dist, diff = 0.0; 11c4fb69feSStefano Zampini 12c4fb69feSStefano Zampini for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); } 13c4fb69feSStefano Zampini dist = PetscSqrtReal(diff); 14c4fb69feSStefano Zampini return PetscExpReal(-dist / clength); 15c4fb69feSStefano Zampini } 16c4fb69feSStefano Zampini 17c4fb69feSStefano Zampini static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx) 18c4fb69feSStefano Zampini { 19c4fb69feSStefano Zampini PetscInt d; 20c4fb69feSStefano Zampini PetscReal clength = sdim == 3 ? 0.2 : 0.1; 21c4fb69feSStefano Zampini PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0; 22c4fb69feSStefano Zampini 23c4fb69feSStefano Zampini for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; } 24c4fb69feSStefano Zampini for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; } 25c4fb69feSStefano Zampini for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); } 26c4fb69feSStefano Zampini dist = PetscSqrtReal(diff); 27c4fb69feSStefano Zampini return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.; 28c4fb69feSStefano Zampini } 29c4fb69feSStefano Zampini 30c4fb69feSStefano Zampini int main(int argc,char **argv) 31c4fb69feSStefano Zampini { 32c4fb69feSStefano Zampini Mat A,B,C,D; 33c4fb69feSStefano Zampini Vec v,x,y,Ax,Ay,Bx,By; 34c4fb69feSStefano Zampini PetscRandom r; 35c4fb69feSStefano Zampini PetscLayout map; 36c4fb69feSStefano Zampini PetscScalar *Adata = NULL, *Cdata = NULL, scale = 1.0; 37c4fb69feSStefano Zampini PetscReal *coords,nA,nD,nB,err,nX,norms[3]; 38c4fb69feSStefano Zampini PetscInt N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, nt, ntrials = 2; 39c4fb69feSStefano Zampini PetscMPIInt size,rank; 40c4fb69feSStefano Zampini PetscBool testlayout = PETSC_FALSE,flg,symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE; 41*53022affSStefano Zampini PetscBool checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE, flgglob; 42*53022affSStefano Zampini PetscBool testtrans, testnorm, randommat = PETSC_TRUE, testorthog, testcompress; 43c4fb69feSStefano Zampini void (*approxnormfunc)(void); 44c4fb69feSStefano Zampini void (*Anormfunc)(void); 45c4fb69feSStefano Zampini PetscErrorCode ierr; 46c4fb69feSStefano Zampini 47*53022affSStefano Zampini #if defined(PETSC_HAVE_MPI_INIT_THREAD) 48c4fb69feSStefano Zampini PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE; 49*53022affSStefano Zampini #endif 50c4fb69feSStefano Zampini ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr; 51*53022affSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-ng",&N,&flgglob);CHKERRQ(ierr); 52c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr); 53c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr); 54c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);CHKERRQ(ierr); 55c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);CHKERRQ(ierr); 56c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);CHKERRQ(ierr); 57c4fb69feSStefano Zampini ierr = PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);CHKERRQ(ierr); 58c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);CHKERRQ(ierr); 59*53022affSStefano Zampini if (!flgglob) { ierr = PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL);CHKERRQ(ierr); } 60c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);CHKERRQ(ierr); 61c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);CHKERRQ(ierr); 62c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);CHKERRQ(ierr); 63c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);CHKERRQ(ierr); 64c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);CHKERRQ(ierr); 65c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);CHKERRQ(ierr); 66c4fb69feSStefano Zampini ierr = PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);CHKERRQ(ierr); 67c4fb69feSStefano Zampini ierr = PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);CHKERRQ(ierr); 68c4fb69feSStefano Zampini if (!Asymm) symm = PETSC_FALSE; 69c4fb69feSStefano Zampini 70ffc4695bSBarry Smith ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr); 71*53022affSStefano Zampini /* MatMultTranspose for nonsymmetric matrices in parallel not implemented */ 72c4fb69feSStefano Zampini testtrans = (PetscBool)(size == 1 || symm); 73c4fb69feSStefano Zampini testnorm = (PetscBool)(size == 1 || symm); 74*53022affSStefano Zampini testorthog = (PetscBool)(size == 1 || symm); 75*53022affSStefano Zampini testcompress = (PetscBool)(size == 1 || symm); 76c4fb69feSStefano Zampini 77ffc4695bSBarry Smith ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRMPI(ierr); 78c4fb69feSStefano Zampini ierr = PetscLayoutCreate(PETSC_COMM_WORLD,&map);CHKERRQ(ierr); 79c4fb69feSStefano Zampini if (testlayout) { 80c4fb69feSStefano Zampini if (rank%2) n = PetscMax(2*n-5*rank,0); 81c4fb69feSStefano Zampini else n = 2*n+rank; 82c4fb69feSStefano Zampini } 83*53022affSStefano Zampini if (!flgglob) { 84c4fb69feSStefano Zampini ierr = PetscLayoutSetLocalSize(map,n);CHKERRQ(ierr); 85c4fb69feSStefano Zampini ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 86c4fb69feSStefano Zampini ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 87*53022affSStefano Zampini } else { 88*53022affSStefano Zampini ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr); 89*53022affSStefano Zampini ierr = PetscLayoutSetUp(map);CHKERRQ(ierr); 90*53022affSStefano Zampini ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 91*53022affSStefano Zampini } 92c4fb69feSStefano Zampini ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr); 93c4fb69feSStefano Zampini 94c4fb69feSStefano Zampini if (lda) { 95c4fb69feSStefano Zampini ierr = PetscMalloc1(N*(n+lda),&Adata);CHKERRQ(ierr); 96c4fb69feSStefano Zampini } 97c4fb69feSStefano Zampini ierr = MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);CHKERRQ(ierr); 98c4fb69feSStefano Zampini ierr = MatDenseSetLDA(A,n+lda);CHKERRQ(ierr); 99c4fb69feSStefano Zampini 100*53022affSStefano Zampini /* Create random points; these are replicated in order to populate a dense matrix and to compare sequential and dense runs 101*53022affSStefano Zampini The constructor for MATH2OPUS can take as input the distributed coordinates and replicates them internally in case 102*53022affSStefano Zampini a kernel construction is requested */ 103*53022affSStefano Zampini ierr = PetscRandomCreate(PETSC_COMM_WORLD,&r);CHKERRQ(ierr); 104*53022affSStefano Zampini ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr); 105*53022affSStefano Zampini ierr = PetscRandomSetSeed(r,123456);CHKERRQ(ierr); 106*53022affSStefano Zampini ierr = PetscRandomSeed(r);CHKERRQ(ierr); 107*53022affSStefano Zampini ierr = PetscMalloc1(N*dim,&coords);CHKERRQ(ierr); 108*53022affSStefano Zampini ierr = PetscRandomGetValuesReal(r,N*dim,coords);CHKERRQ(ierr); 109*53022affSStefano Zampini ierr = PetscRandomDestroy(&r);CHKERRQ(ierr); 110*53022affSStefano Zampini 111c4fb69feSStefano Zampini if (kernel || !randommat) { 112*53022affSStefano Zampini MatH2OpusKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm; 113c4fb69feSStefano Zampini PetscInt ist,ien; 114c4fb69feSStefano Zampini 115c4fb69feSStefano Zampini ierr = MatGetOwnershipRange(A,&ist,&ien);CHKERRQ(ierr); 116c4fb69feSStefano Zampini for (i = ist; i < ien; i++) { 117c4fb69feSStefano Zampini for (j = 0; j < N; j++) { 118*53022affSStefano Zampini ierr = MatSetValue(A,i,j,(*k)(dim,coords + i*dim,coords + j*dim,NULL),INSERT_VALUES);CHKERRQ(ierr); 119c4fb69feSStefano Zampini } 120c4fb69feSStefano Zampini } 121c4fb69feSStefano Zampini ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 122c4fb69feSStefano Zampini ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 123c4fb69feSStefano Zampini if (kernel) { 124*53022affSStefano Zampini ierr = MatCreateH2OpusFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords + ist*dim,PETSC_TRUE,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr); 125c4fb69feSStefano Zampini } else { 126*53022affSStefano Zampini ierr = MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr); 127c4fb69feSStefano Zampini } 128c4fb69feSStefano Zampini } else { 129*53022affSStefano Zampini PetscInt ist; 130*53022affSStefano Zampini 131*53022affSStefano Zampini ierr = MatGetOwnershipRange(A,&ist,NULL);CHKERRQ(ierr); 132*53022affSStefano Zampini ierr = MatSetRandom(A,NULL);CHKERRQ(ierr); 133c4fb69feSStefano Zampini if (Asymm) { 134c4fb69feSStefano Zampini ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 135c4fb69feSStefano Zampini ierr = MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 136c4fb69feSStefano Zampini ierr = MatDestroy(&B);CHKERRQ(ierr); 137c4fb69feSStefano Zampini ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 138c4fb69feSStefano Zampini } 139*53022affSStefano Zampini ierr = MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr); 140c4fb69feSStefano Zampini } 141*53022affSStefano Zampini ierr = PetscFree(coords);CHKERRQ(ierr); 142c4fb69feSStefano Zampini if (agpu) { 143c4fb69feSStefano Zampini ierr = MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 144c4fb69feSStefano Zampini } 145c4fb69feSStefano Zampini ierr = MatViewFromOptions(A,NULL,"-A_view");CHKERRQ(ierr); 146c4fb69feSStefano Zampini 147c4fb69feSStefano Zampini ierr = MatSetOption(B,MAT_SYMMETRIC,symm);CHKERRQ(ierr); 148c4fb69feSStefano Zampini 149c4fb69feSStefano Zampini /* assemble the H-matrix */ 150c4fb69feSStefano Zampini ierr = MatBindToCPU(B,(PetscBool)!bgpu);CHKERRQ(ierr); 151c4fb69feSStefano Zampini ierr = MatSetFromOptions(B);CHKERRQ(ierr); 152c4fb69feSStefano Zampini ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 153c4fb69feSStefano Zampini ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 154c4fb69feSStefano Zampini ierr = MatViewFromOptions(B,NULL,"-B_view");CHKERRQ(ierr); 155c4fb69feSStefano Zampini 156c4fb69feSStefano Zampini /* Test MatScale */ 157c4fb69feSStefano Zampini ierr = MatScale(A,scale);CHKERRQ(ierr); 158c4fb69feSStefano Zampini ierr = MatScale(B,scale);CHKERRQ(ierr); 159c4fb69feSStefano Zampini 160c4fb69feSStefano Zampini /* Test MatMult */ 161c4fb69feSStefano Zampini ierr = MatCreateVecs(A,&Ax,&Ay);CHKERRQ(ierr); 162c4fb69feSStefano Zampini ierr = MatCreateVecs(B,&Bx,&By);CHKERRQ(ierr); 163*53022affSStefano Zampini ierr = VecSetRandom(Ax,NULL);CHKERRQ(ierr); 164c4fb69feSStefano Zampini ierr = VecCopy(Ax,Bx);CHKERRQ(ierr); 165c4fb69feSStefano Zampini ierr = MatMult(A,Ax,Ay);CHKERRQ(ierr); 166c4fb69feSStefano Zampini ierr = MatMult(B,Bx,By);CHKERRQ(ierr); 167c4fb69feSStefano Zampini ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr); 168c4fb69feSStefano Zampini ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr); 169c4fb69feSStefano Zampini ierr = VecNorm(Ay,NORM_INFINITY,&nX);CHKERRQ(ierr); 170c4fb69feSStefano Zampini ierr = VecAXPY(Ay,-1.0,By);CHKERRQ(ierr); 171c4fb69feSStefano Zampini ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr); 172c4fb69feSStefano Zampini ierr = VecNorm(Ay,NORM_INFINITY,&err);CHKERRQ(ierr); 173c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);CHKERRQ(ierr); 174c4fb69feSStefano Zampini ierr = VecScale(By,-1.0);CHKERRQ(ierr); 175c4fb69feSStefano Zampini ierr = MatMultAdd(B,Bx,By,By);CHKERRQ(ierr); 176c4fb69feSStefano Zampini ierr = VecNorm(By,NORM_INFINITY,&err);CHKERRQ(ierr); 177c4fb69feSStefano Zampini ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr); 178c4fb69feSStefano Zampini if (err > PETSC_SMALL) { 179c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);CHKERRQ(ierr); 180c4fb69feSStefano Zampini } 181c4fb69feSStefano Zampini 182c4fb69feSStefano Zampini /* Test MatNorm */ 183c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr); 184c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr); 185c4fb69feSStefano Zampini norms[2] = -1.; /* NORM_2 not supported */ 186c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr); 187c4fb69feSStefano Zampini ierr = MatGetOperation(A,MATOP_NORM,&Anormfunc);CHKERRQ(ierr); 188c4fb69feSStefano Zampini ierr = MatGetOperation(B,MATOP_NORM,&approxnormfunc);CHKERRQ(ierr); 189c4fb69feSStefano Zampini ierr = MatSetOperation(A,MATOP_NORM,approxnormfunc);CHKERRQ(ierr); 190c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr); 191c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr); 192c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_2,&norms[2]);CHKERRQ(ierr); 193c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr); 194c4fb69feSStefano Zampini if (testnorm) { 195c4fb69feSStefano Zampini ierr = MatNorm(B,NORM_INFINITY,&norms[0]);CHKERRQ(ierr); 196c4fb69feSStefano Zampini ierr = MatNorm(B,NORM_1,&norms[1]);CHKERRQ(ierr); 197c4fb69feSStefano Zampini ierr = MatNorm(B,NORM_2,&norms[2]);CHKERRQ(ierr); 198c4fb69feSStefano Zampini } else { 199c4fb69feSStefano Zampini norms[0] = -1.; 200c4fb69feSStefano Zampini norms[1] = -1.; 201c4fb69feSStefano Zampini norms[2] = -1.; 202c4fb69feSStefano Zampini } 203c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr); 204c4fb69feSStefano Zampini ierr = MatSetOperation(A,MATOP_NORM,Anormfunc);CHKERRQ(ierr); 205c4fb69feSStefano Zampini 206c4fb69feSStefano Zampini /* Test MatDuplicate */ 207c4fb69feSStefano Zampini ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr); 208*53022affSStefano Zampini ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr); 209c4fb69feSStefano Zampini ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr); 210c4fb69feSStefano Zampini if (!flg) { 211c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");CHKERRQ(ierr); 212c4fb69feSStefano Zampini } 213c4fb69feSStefano Zampini if (testtrans) { 214c4fb69feSStefano Zampini ierr = MatMultTransposeEqual(B,D,10,&flg);CHKERRQ(ierr); 215c4fb69feSStefano Zampini if (!flg) { 216c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranpose error after MatDuplicate\n");CHKERRQ(ierr); 217c4fb69feSStefano Zampini } 218c4fb69feSStefano Zampini } 219c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 220c4fb69feSStefano Zampini 221c4fb69feSStefano Zampini if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */ 222*53022affSStefano Zampini ierr = VecSetRandom(Ay,NULL);CHKERRQ(ierr); 223c4fb69feSStefano Zampini ierr = VecCopy(Ay,By);CHKERRQ(ierr); 224c4fb69feSStefano Zampini ierr = MatMultTranspose(A,Ay,Ax);CHKERRQ(ierr); 225c4fb69feSStefano Zampini ierr = MatMultTranspose(B,By,Bx);CHKERRQ(ierr); 226c4fb69feSStefano Zampini ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr); 227c4fb69feSStefano Zampini ierr = VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");CHKERRQ(ierr); 228c4fb69feSStefano Zampini ierr = VecNorm(Ax,NORM_INFINITY,&nX);CHKERRQ(ierr); 229c4fb69feSStefano Zampini ierr = VecAXPY(Ax,-1.0,Bx);CHKERRQ(ierr); 230c4fb69feSStefano Zampini ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr); 231c4fb69feSStefano Zampini ierr = VecNorm(Ax,NORM_INFINITY,&err);CHKERRQ(ierr); 232c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);CHKERRQ(ierr); 233c4fb69feSStefano Zampini ierr = VecScale(Bx,-1.0);CHKERRQ(ierr); 234c4fb69feSStefano Zampini ierr = MatMultTransposeAdd(B,By,Bx,Bx);CHKERRQ(ierr); 235c4fb69feSStefano Zampini ierr = VecNorm(Bx,NORM_INFINITY,&err);CHKERRQ(ierr); 236c4fb69feSStefano Zampini if (err > PETSC_SMALL) { 237c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);CHKERRQ(ierr); 238c4fb69feSStefano Zampini } 239c4fb69feSStefano Zampini } 240c4fb69feSStefano Zampini ierr = VecDestroy(&Ax);CHKERRQ(ierr); 241c4fb69feSStefano Zampini ierr = VecDestroy(&Ay);CHKERRQ(ierr); 242c4fb69feSStefano Zampini ierr = VecDestroy(&Bx);CHKERRQ(ierr); 243c4fb69feSStefano Zampini ierr = VecDestroy(&By);CHKERRQ(ierr); 244c4fb69feSStefano Zampini 245c4fb69feSStefano Zampini /* Test MatMatMult */ 246c4fb69feSStefano Zampini if (ldc) { 247c4fb69feSStefano Zampini ierr = PetscMalloc1(nrhs*(n+ldc),&Cdata);CHKERRQ(ierr); 248c4fb69feSStefano Zampini } 249c4fb69feSStefano Zampini ierr = MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);CHKERRQ(ierr); 250c4fb69feSStefano Zampini ierr = MatDenseSetLDA(C,n+ldc);CHKERRQ(ierr); 251*53022affSStefano Zampini ierr = MatSetRandom(C,NULL);CHKERRQ(ierr); 252c4fb69feSStefano Zampini if (cgpu) { 253c4fb69feSStefano Zampini ierr = MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);CHKERRQ(ierr); 254c4fb69feSStefano Zampini } 255c4fb69feSStefano Zampini for (nt = 0; nt < ntrials; nt++) { 256c4fb69feSStefano Zampini ierr = MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr); 257c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-bc_view");CHKERRQ(ierr); 258c4fb69feSStefano Zampini ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr); 259c4fb69feSStefano Zampini if (flg) { 260c4fb69feSStefano Zampini ierr = MatCreateVecs(B,&x,&y);CHKERRQ(ierr); 261c4fb69feSStefano Zampini ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr); 262c4fb69feSStefano Zampini for (i = 0; i < nrhs; i++) { 263c4fb69feSStefano Zampini ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr); 264c4fb69feSStefano Zampini ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr); 265c4fb69feSStefano Zampini ierr = MatMult(B,x,y);CHKERRQ(ierr); 266c4fb69feSStefano Zampini ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr); 267c4fb69feSStefano Zampini ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr); 268c4fb69feSStefano Zampini if (err > PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMat err %D %g\n",i,err);CHKERRQ(ierr); } 269c4fb69feSStefano Zampini } 270c4fb69feSStefano Zampini ierr = VecDestroy(&y);CHKERRQ(ierr); 271c4fb69feSStefano Zampini ierr = VecDestroy(&x);CHKERRQ(ierr); 272c4fb69feSStefano Zampini ierr = VecDestroy(&v);CHKERRQ(ierr); 273c4fb69feSStefano Zampini } 274c4fb69feSStefano Zampini } 275c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 276c4fb69feSStefano Zampini 277c4fb69feSStefano Zampini /* Test MatTransposeMatMult */ 278c4fb69feSStefano Zampini if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */ 279c4fb69feSStefano Zampini for (nt = 0; nt < ntrials; nt++) { 280c4fb69feSStefano Zampini ierr = MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr); 281c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-btc_view");CHKERRQ(ierr); 282c4fb69feSStefano Zampini ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr); 283c4fb69feSStefano Zampini if (flg) { 284c4fb69feSStefano Zampini ierr = MatCreateVecs(B,&y,&x);CHKERRQ(ierr); 285c4fb69feSStefano Zampini ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr); 286c4fb69feSStefano Zampini for (i = 0; i < nrhs; i++) { 287c4fb69feSStefano Zampini ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr); 288c4fb69feSStefano Zampini ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr); 289c4fb69feSStefano Zampini ierr = MatMultTranspose(B,x,y);CHKERRQ(ierr); 290c4fb69feSStefano Zampini ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr); 291c4fb69feSStefano Zampini ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr); 292c4fb69feSStefano Zampini if (err > PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %D %g\n",i,err);CHKERRQ(ierr); } 293c4fb69feSStefano Zampini } 294c4fb69feSStefano Zampini ierr = VecDestroy(&y);CHKERRQ(ierr); 295c4fb69feSStefano Zampini ierr = VecDestroy(&x);CHKERRQ(ierr); 296c4fb69feSStefano Zampini ierr = VecDestroy(&v);CHKERRQ(ierr); 297c4fb69feSStefano Zampini } 298c4fb69feSStefano Zampini } 299c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 300c4fb69feSStefano Zampini } 301c4fb69feSStefano Zampini 302*53022affSStefano Zampini if (testorthog) { 303*53022affSStefano Zampini ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr); 304*53022affSStefano Zampini ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr); 305*53022affSStefano Zampini ierr = MatH2OpusOrthogonalize(D);CHKERRQ(ierr); 306*53022affSStefano Zampini ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr); 307*53022affSStefano Zampini if (!flg) { 308*53022affSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after basis ortogonalization\n");CHKERRQ(ierr); 309*53022affSStefano Zampini } 310*53022affSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 311*53022affSStefano Zampini } 312*53022affSStefano Zampini 313*53022affSStefano Zampini if (testcompress) { 314*53022affSStefano Zampini ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr); 315*53022affSStefano Zampini ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr); 316*53022affSStefano Zampini ierr = MatH2OpusCompress(D,PETSC_SMALL);CHKERRQ(ierr); 317*53022affSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 318*53022affSStefano Zampini } 319*53022affSStefano Zampini 320c4fb69feSStefano Zampini /* check explicit operator */ 321c4fb69feSStefano Zampini if (checkexpl) { 322c4fb69feSStefano Zampini Mat Be, Bet; 323c4fb69feSStefano Zampini 324c4fb69feSStefano Zampini ierr = MatComputeOperator(B,MATDENSE,&D);CHKERRQ(ierr); 325c4fb69feSStefano Zampini ierr = MatDuplicate(D,MAT_COPY_VALUES,&Be);CHKERRQ(ierr); 326c4fb69feSStefano Zampini ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr); 327c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-expl_view");CHKERRQ(ierr); 328c4fb69feSStefano Zampini ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 329c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-diff_view");CHKERRQ(ierr); 330c4fb69feSStefano Zampini ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr); 331c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr); 332c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr); 333c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 334c4fb69feSStefano Zampini 335c4fb69feSStefano Zampini if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */ 336c4fb69feSStefano Zampini ierr = MatTranspose(A,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 337c4fb69feSStefano Zampini ierr = MatComputeOperatorTranspose(B,MATDENSE,&D);CHKERRQ(ierr); 338c4fb69feSStefano Zampini ierr = MatDuplicate(D,MAT_COPY_VALUES,&Bet);CHKERRQ(ierr); 339c4fb69feSStefano Zampini ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr); 340c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-expl_trans_view");CHKERRQ(ierr); 341c4fb69feSStefano Zampini ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 342c4fb69feSStefano Zampini ierr = MatViewFromOptions(D,NULL,"-diff_trans_view");CHKERRQ(ierr); 343c4fb69feSStefano Zampini ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr); 344c4fb69feSStefano Zampini ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr); 345c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr); 346c4fb69feSStefano Zampini ierr = MatDestroy(&D);CHKERRQ(ierr); 347c4fb69feSStefano Zampini 348c4fb69feSStefano Zampini ierr = MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);CHKERRQ(ierr); 349c4fb69feSStefano Zampini ierr = MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 350c4fb69feSStefano Zampini ierr = MatViewFromOptions(Be,NULL,"-diff_expl_view");CHKERRQ(ierr); 351c4fb69feSStefano Zampini ierr = MatNorm(Be,NORM_FROBENIUS,&nB);CHKERRQ(ierr); 352c4fb69feSStefano Zampini ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);CHKERRQ(ierr); 353c4fb69feSStefano Zampini ierr = MatDestroy(&Be);CHKERRQ(ierr); 354c4fb69feSStefano Zampini ierr = MatDestroy(&Bet);CHKERRQ(ierr); 355c4fb69feSStefano Zampini } 356c4fb69feSStefano Zampini } 357c4fb69feSStefano Zampini ierr = MatDestroy(&A);CHKERRQ(ierr); 358c4fb69feSStefano Zampini ierr = MatDestroy(&B);CHKERRQ(ierr); 359c4fb69feSStefano Zampini ierr = MatDestroy(&C);CHKERRQ(ierr); 360c4fb69feSStefano Zampini ierr = PetscFree(Cdata);CHKERRQ(ierr); 361c4fb69feSStefano Zampini ierr = PetscFree(Adata);CHKERRQ(ierr); 362c4fb69feSStefano Zampini ierr = PetscFinalize(); 363c4fb69feSStefano Zampini return ierr; 364c4fb69feSStefano Zampini } 365c4fb69feSStefano Zampini 366c4fb69feSStefano Zampini /*TEST 367c4fb69feSStefano Zampini 368c4fb69feSStefano Zampini build: 369*53022affSStefano Zampini requires: h2opus 370c4fb69feSStefano Zampini 371c4fb69feSStefano Zampini #tests from kernel 372c4fb69feSStefano Zampini test: 373*53022affSStefano Zampini requires: h2opus 374c4fb69feSStefano Zampini nsize: 1 375c4fb69feSStefano Zampini suffix: 1 376c4fb69feSStefano Zampini args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0 377c4fb69feSStefano Zampini 378c4fb69feSStefano Zampini test: 379*53022affSStefano Zampini requires: h2opus 380c4fb69feSStefano Zampini nsize: 1 381c4fb69feSStefano Zampini suffix: 1_ld 382c4fb69feSStefano Zampini output_file: output/ex66_1.out 383c4fb69feSStefano Zampini args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 0 384c4fb69feSStefano Zampini 385c4fb69feSStefano Zampini test: 386*53022affSStefano Zampini requires: h2opus cuda 387c4fb69feSStefano Zampini nsize: 1 388c4fb69feSStefano Zampini suffix: 1_cuda 389c4fb69feSStefano Zampini output_file: output/ex66_1.out 390c4fb69feSStefano Zampini args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1 391c4fb69feSStefano Zampini 392c4fb69feSStefano Zampini test: 393*53022affSStefano Zampini requires: h2opus cuda 394c4fb69feSStefano Zampini nsize: 1 395c4fb69feSStefano Zampini suffix: 1_cuda_ld 396c4fb69feSStefano Zampini output_file: output/ex66_1.out 397c4fb69feSStefano Zampini args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 1 398c4fb69feSStefano Zampini 399c4fb69feSStefano Zampini test: 400*53022affSStefano Zampini requires: h2opus 401*53022affSStefano Zampini nsize: {{2 3}} 402c4fb69feSStefano Zampini suffix: 1_par 403*53022affSStefano Zampini args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0 404c4fb69feSStefano Zampini 405c4fb69feSStefano Zampini test: 406*53022affSStefano Zampini requires: h2opus cuda 407*53022affSStefano Zampini nsize: {{2 3}} 408c4fb69feSStefano Zampini suffix: 1_par_cuda 409*53022affSStefano Zampini args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}} 410c4fb69feSStefano Zampini output_file: output/ex66_1_par.out 411c4fb69feSStefano Zampini 412c4fb69feSStefano Zampini #tests from matrix sampling (parallel or unsymmetric not supported) 413c4fb69feSStefano Zampini test: 414*53022affSStefano Zampini requires: h2opus 415c4fb69feSStefano Zampini nsize: 1 416c4fb69feSStefano Zampini suffix: 2 417c4fb69feSStefano Zampini args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0 418c4fb69feSStefano Zampini 419c4fb69feSStefano Zampini test: 420*53022affSStefano Zampini requires: h2opus cuda 421c4fb69feSStefano Zampini nsize: 1 422c4fb69feSStefano Zampini suffix: 2_cuda 423c4fb69feSStefano Zampini output_file: output/ex66_2.out 424c4fb69feSStefano Zampini args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}} 425c4fb69feSStefano Zampini 426*53022affSStefano Zampini #tests view operation 427*53022affSStefano Zampini test: 428*53022affSStefano Zampini requires: h2opus !cuda 429*53022affSStefano Zampini filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]" 430*53022affSStefano Zampini nsize: {{1 2 3}} 431*53022affSStefano Zampini suffix: view 432*53022affSStefano Zampini args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2 433*53022affSStefano Zampini 434*53022affSStefano Zampini test: 435*53022affSStefano Zampini requires: h2opus cuda 436*53022affSStefano Zampini filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]" 437*53022affSStefano Zampini nsize: {{1 2 3}} 438*53022affSStefano Zampini suffix: view_cuda 439*53022affSStefano Zampini args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -bgpu -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2 440*53022affSStefano Zampini 441c4fb69feSStefano Zampini TEST*/ 442