xref: /petsc/src/mat/tests/ex66.c (revision 53022affac82b2fcec7b6432d0d3b2c8aa0487f8)
1*53022affSStefano Zampini static char help[] = "Tests MATH2OPUS\n\n";
2c4fb69feSStefano Zampini 
3c4fb69feSStefano Zampini #include <petscmat.h>
4c4fb69feSStefano Zampini #include <petscsf.h>
5c4fb69feSStefano Zampini 
6c4fb69feSStefano Zampini static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
7c4fb69feSStefano Zampini {
8c4fb69feSStefano Zampini     PetscInt  d;
9c4fb69feSStefano Zampini     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
10c4fb69feSStefano Zampini     PetscReal dist, diff = 0.0;
11c4fb69feSStefano Zampini 
12c4fb69feSStefano Zampini     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
13c4fb69feSStefano Zampini     dist = PetscSqrtReal(diff);
14c4fb69feSStefano Zampini     return PetscExpReal(-dist / clength);
15c4fb69feSStefano Zampini }
16c4fb69feSStefano Zampini 
17c4fb69feSStefano Zampini static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
18c4fb69feSStefano Zampini {
19c4fb69feSStefano Zampini     PetscInt  d;
20c4fb69feSStefano Zampini     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
21c4fb69feSStefano Zampini     PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0;
22c4fb69feSStefano Zampini 
23c4fb69feSStefano Zampini     for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; }
24c4fb69feSStefano Zampini     for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; }
25c4fb69feSStefano Zampini     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
26c4fb69feSStefano Zampini     dist = PetscSqrtReal(diff);
27c4fb69feSStefano Zampini     return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.;
28c4fb69feSStefano Zampini }
29c4fb69feSStefano Zampini 
30c4fb69feSStefano Zampini int main(int argc,char **argv)
31c4fb69feSStefano Zampini {
32c4fb69feSStefano Zampini   Mat            A,B,C,D;
33c4fb69feSStefano Zampini   Vec            v,x,y,Ax,Ay,Bx,By;
34c4fb69feSStefano Zampini   PetscRandom    r;
35c4fb69feSStefano Zampini   PetscLayout    map;
36c4fb69feSStefano Zampini   PetscScalar    *Adata = NULL, *Cdata = NULL, scale = 1.0;
37c4fb69feSStefano Zampini   PetscReal      *coords,nA,nD,nB,err,nX,norms[3];
38c4fb69feSStefano Zampini   PetscInt       N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, nt, ntrials = 2;
39c4fb69feSStefano Zampini   PetscMPIInt    size,rank;
40c4fb69feSStefano Zampini   PetscBool      testlayout = PETSC_FALSE,flg,symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE;
41*53022affSStefano Zampini   PetscBool      checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE, flgglob;
42*53022affSStefano Zampini   PetscBool      testtrans, testnorm, randommat = PETSC_TRUE, testorthog, testcompress;
43c4fb69feSStefano Zampini   void           (*approxnormfunc)(void);
44c4fb69feSStefano Zampini   void           (*Anormfunc)(void);
45c4fb69feSStefano Zampini   PetscErrorCode ierr;
46c4fb69feSStefano Zampini 
47*53022affSStefano Zampini #if defined(PETSC_HAVE_MPI_INIT_THREAD)
48c4fb69feSStefano Zampini   PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE;
49*53022affSStefano Zampini #endif
50c4fb69feSStefano Zampini   ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
51*53022affSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-ng",&N,&flgglob);CHKERRQ(ierr);
52c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
53c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr);
54c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);CHKERRQ(ierr);
55c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);CHKERRQ(ierr);
56c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);CHKERRQ(ierr);
57c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);CHKERRQ(ierr);
58c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);CHKERRQ(ierr);
59*53022affSStefano Zampini   if (!flgglob) { ierr = PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL);CHKERRQ(ierr); }
60c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);CHKERRQ(ierr);
61c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);CHKERRQ(ierr);
62c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);CHKERRQ(ierr);
63c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);CHKERRQ(ierr);
64c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);CHKERRQ(ierr);
65c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);CHKERRQ(ierr);
66c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);CHKERRQ(ierr);
67c4fb69feSStefano Zampini   ierr = PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);CHKERRQ(ierr);
68c4fb69feSStefano Zampini   if (!Asymm) symm = PETSC_FALSE;
69c4fb69feSStefano Zampini 
70ffc4695bSBarry Smith   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr);
71*53022affSStefano Zampini   /* MatMultTranspose for nonsymmetric matrices in parallel not implemented */
72c4fb69feSStefano Zampini   testtrans = (PetscBool)(size == 1 || symm);
73c4fb69feSStefano Zampini   testnorm = (PetscBool)(size == 1 || symm);
74*53022affSStefano Zampini   testorthog = (PetscBool)(size == 1 || symm);
75*53022affSStefano Zampini   testcompress = (PetscBool)(size == 1 || symm);
76c4fb69feSStefano Zampini 
77ffc4695bSBarry Smith   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRMPI(ierr);
78c4fb69feSStefano Zampini   ierr = PetscLayoutCreate(PETSC_COMM_WORLD,&map);CHKERRQ(ierr);
79c4fb69feSStefano Zampini   if (testlayout) {
80c4fb69feSStefano Zampini     if (rank%2) n = PetscMax(2*n-5*rank,0);
81c4fb69feSStefano Zampini     else n = 2*n+rank;
82c4fb69feSStefano Zampini   }
83*53022affSStefano Zampini   if (!flgglob) {
84c4fb69feSStefano Zampini     ierr = PetscLayoutSetLocalSize(map,n);CHKERRQ(ierr);
85c4fb69feSStefano Zampini     ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
86c4fb69feSStefano Zampini     ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
87*53022affSStefano Zampini   } else {
88*53022affSStefano Zampini     ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
89*53022affSStefano Zampini     ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
90*53022affSStefano Zampini     ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
91*53022affSStefano Zampini   }
92c4fb69feSStefano Zampini   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
93c4fb69feSStefano Zampini 
94c4fb69feSStefano Zampini   if (lda) {
95c4fb69feSStefano Zampini     ierr = PetscMalloc1(N*(n+lda),&Adata);CHKERRQ(ierr);
96c4fb69feSStefano Zampini   }
97c4fb69feSStefano Zampini   ierr = MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);CHKERRQ(ierr);
98c4fb69feSStefano Zampini   ierr = MatDenseSetLDA(A,n+lda);CHKERRQ(ierr);
99c4fb69feSStefano Zampini 
100*53022affSStefano Zampini   /* Create random points; these are replicated in order to populate a dense matrix and to compare sequential and dense runs
101*53022affSStefano Zampini      The constructor for MATH2OPUS can take as input the distributed coordinates and replicates them internally in case
102*53022affSStefano Zampini      a kernel construction is requested */
103*53022affSStefano Zampini   ierr = PetscRandomCreate(PETSC_COMM_WORLD,&r);CHKERRQ(ierr);
104*53022affSStefano Zampini   ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr);
105*53022affSStefano Zampini   ierr = PetscRandomSetSeed(r,123456);CHKERRQ(ierr);
106*53022affSStefano Zampini   ierr = PetscRandomSeed(r);CHKERRQ(ierr);
107*53022affSStefano Zampini   ierr = PetscMalloc1(N*dim,&coords);CHKERRQ(ierr);
108*53022affSStefano Zampini   ierr = PetscRandomGetValuesReal(r,N*dim,coords);CHKERRQ(ierr);
109*53022affSStefano Zampini   ierr = PetscRandomDestroy(&r);CHKERRQ(ierr);
110*53022affSStefano Zampini 
111c4fb69feSStefano Zampini   if (kernel || !randommat) {
112*53022affSStefano Zampini     MatH2OpusKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm;
113c4fb69feSStefano Zampini     PetscInt        ist,ien;
114c4fb69feSStefano Zampini 
115c4fb69feSStefano Zampini     ierr = MatGetOwnershipRange(A,&ist,&ien);CHKERRQ(ierr);
116c4fb69feSStefano Zampini     for (i = ist; i < ien; i++) {
117c4fb69feSStefano Zampini       for (j = 0; j < N; j++) {
118*53022affSStefano Zampini         ierr = MatSetValue(A,i,j,(*k)(dim,coords + i*dim,coords + j*dim,NULL),INSERT_VALUES);CHKERRQ(ierr);
119c4fb69feSStefano Zampini       }
120c4fb69feSStefano Zampini     }
121c4fb69feSStefano Zampini     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
122c4fb69feSStefano Zampini     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123c4fb69feSStefano Zampini     if (kernel) {
124*53022affSStefano Zampini       ierr = MatCreateH2OpusFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords + ist*dim,PETSC_TRUE,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
125c4fb69feSStefano Zampini     } else {
126*53022affSStefano Zampini       ierr = MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
127c4fb69feSStefano Zampini     }
128c4fb69feSStefano Zampini   } else {
129*53022affSStefano Zampini     PetscInt ist;
130*53022affSStefano Zampini 
131*53022affSStefano Zampini     ierr = MatGetOwnershipRange(A,&ist,NULL);CHKERRQ(ierr);
132*53022affSStefano Zampini     ierr = MatSetRandom(A,NULL);CHKERRQ(ierr);
133c4fb69feSStefano Zampini     if (Asymm) {
134c4fb69feSStefano Zampini       ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
135c4fb69feSStefano Zampini       ierr = MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
136c4fb69feSStefano Zampini       ierr = MatDestroy(&B);CHKERRQ(ierr);
137c4fb69feSStefano Zampini       ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
138c4fb69feSStefano Zampini     }
139*53022affSStefano Zampini     ierr = MatCreateH2OpusFromMat(A,dim,coords + ist*dim,PETSC_TRUE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
140c4fb69feSStefano Zampini   }
141*53022affSStefano Zampini   ierr = PetscFree(coords);CHKERRQ(ierr);
142c4fb69feSStefano Zampini   if (agpu) {
143c4fb69feSStefano Zampini     ierr = MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
144c4fb69feSStefano Zampini   }
145c4fb69feSStefano Zampini   ierr = MatViewFromOptions(A,NULL,"-A_view");CHKERRQ(ierr);
146c4fb69feSStefano Zampini 
147c4fb69feSStefano Zampini   ierr = MatSetOption(B,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
148c4fb69feSStefano Zampini 
149c4fb69feSStefano Zampini   /* assemble the H-matrix */
150c4fb69feSStefano Zampini   ierr = MatBindToCPU(B,(PetscBool)!bgpu);CHKERRQ(ierr);
151c4fb69feSStefano Zampini   ierr = MatSetFromOptions(B);CHKERRQ(ierr);
152c4fb69feSStefano Zampini   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
153c4fb69feSStefano Zampini   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
154c4fb69feSStefano Zampini   ierr = MatViewFromOptions(B,NULL,"-B_view");CHKERRQ(ierr);
155c4fb69feSStefano Zampini 
156c4fb69feSStefano Zampini   /* Test MatScale */
157c4fb69feSStefano Zampini   ierr = MatScale(A,scale);CHKERRQ(ierr);
158c4fb69feSStefano Zampini   ierr = MatScale(B,scale);CHKERRQ(ierr);
159c4fb69feSStefano Zampini 
160c4fb69feSStefano Zampini   /* Test MatMult */
161c4fb69feSStefano Zampini   ierr = MatCreateVecs(A,&Ax,&Ay);CHKERRQ(ierr);
162c4fb69feSStefano Zampini   ierr = MatCreateVecs(B,&Bx,&By);CHKERRQ(ierr);
163*53022affSStefano Zampini   ierr = VecSetRandom(Ax,NULL);CHKERRQ(ierr);
164c4fb69feSStefano Zampini   ierr = VecCopy(Ax,Bx);CHKERRQ(ierr);
165c4fb69feSStefano Zampini   ierr = MatMult(A,Ax,Ay);CHKERRQ(ierr);
166c4fb69feSStefano Zampini   ierr = MatMult(B,Bx,By);CHKERRQ(ierr);
167c4fb69feSStefano Zampini   ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr);
168c4fb69feSStefano Zampini   ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr);
169c4fb69feSStefano Zampini   ierr = VecNorm(Ay,NORM_INFINITY,&nX);CHKERRQ(ierr);
170c4fb69feSStefano Zampini   ierr = VecAXPY(Ay,-1.0,By);CHKERRQ(ierr);
171c4fb69feSStefano Zampini   ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr);
172c4fb69feSStefano Zampini   ierr = VecNorm(Ay,NORM_INFINITY,&err);CHKERRQ(ierr);
173c4fb69feSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);CHKERRQ(ierr);
174c4fb69feSStefano Zampini   ierr = VecScale(By,-1.0);CHKERRQ(ierr);
175c4fb69feSStefano Zampini   ierr = MatMultAdd(B,Bx,By,By);CHKERRQ(ierr);
176c4fb69feSStefano Zampini   ierr = VecNorm(By,NORM_INFINITY,&err);CHKERRQ(ierr);
177c4fb69feSStefano Zampini   ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr);
178c4fb69feSStefano Zampini   if (err > PETSC_SMALL) {
179c4fb69feSStefano Zampini     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);CHKERRQ(ierr);
180c4fb69feSStefano Zampini   }
181c4fb69feSStefano Zampini 
182c4fb69feSStefano Zampini   /* Test MatNorm */
183c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
184c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr);
185c4fb69feSStefano Zampini   norms[2] = -1.; /* NORM_2 not supported */
186c4fb69feSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms:        infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
187c4fb69feSStefano Zampini   ierr = MatGetOperation(A,MATOP_NORM,&Anormfunc);CHKERRQ(ierr);
188c4fb69feSStefano Zampini   ierr = MatGetOperation(B,MATOP_NORM,&approxnormfunc);CHKERRQ(ierr);
189c4fb69feSStefano Zampini   ierr = MatSetOperation(A,MATOP_NORM,approxnormfunc);CHKERRQ(ierr);
190c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
191c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr);
192c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_2,&norms[2]);CHKERRQ(ierr);
193c4fb69feSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
194c4fb69feSStefano Zampini   if (testnorm) {
195c4fb69feSStefano Zampini     ierr = MatNorm(B,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
196c4fb69feSStefano Zampini     ierr = MatNorm(B,NORM_1,&norms[1]);CHKERRQ(ierr);
197c4fb69feSStefano Zampini     ierr = MatNorm(B,NORM_2,&norms[2]);CHKERRQ(ierr);
198c4fb69feSStefano Zampini   } else {
199c4fb69feSStefano Zampini     norms[0] = -1.;
200c4fb69feSStefano Zampini     norms[1] = -1.;
201c4fb69feSStefano Zampini     norms[2] = -1.;
202c4fb69feSStefano Zampini   }
203c4fb69feSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
204c4fb69feSStefano Zampini   ierr = MatSetOperation(A,MATOP_NORM,Anormfunc);CHKERRQ(ierr);
205c4fb69feSStefano Zampini 
206c4fb69feSStefano Zampini   /* Test MatDuplicate */
207c4fb69feSStefano Zampini   ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr);
208*53022affSStefano Zampini   ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
209c4fb69feSStefano Zampini   ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr);
210c4fb69feSStefano Zampini   if (!flg) {
211c4fb69feSStefano Zampini     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");CHKERRQ(ierr);
212c4fb69feSStefano Zampini   }
213c4fb69feSStefano Zampini   if (testtrans) {
214c4fb69feSStefano Zampini     ierr = MatMultTransposeEqual(B,D,10,&flg);CHKERRQ(ierr);
215c4fb69feSStefano Zampini     if (!flg) {
216c4fb69feSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranpose error after MatDuplicate\n");CHKERRQ(ierr);
217c4fb69feSStefano Zampini     }
218c4fb69feSStefano Zampini   }
219c4fb69feSStefano Zampini   ierr = MatDestroy(&D);CHKERRQ(ierr);
220c4fb69feSStefano Zampini 
221c4fb69feSStefano Zampini   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
222*53022affSStefano Zampini     ierr = VecSetRandom(Ay,NULL);CHKERRQ(ierr);
223c4fb69feSStefano Zampini     ierr = VecCopy(Ay,By);CHKERRQ(ierr);
224c4fb69feSStefano Zampini     ierr = MatMultTranspose(A,Ay,Ax);CHKERRQ(ierr);
225c4fb69feSStefano Zampini     ierr = MatMultTranspose(B,By,Bx);CHKERRQ(ierr);
226c4fb69feSStefano Zampini     ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
227c4fb69feSStefano Zampini     ierr = VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
228c4fb69feSStefano Zampini     ierr = VecNorm(Ax,NORM_INFINITY,&nX);CHKERRQ(ierr);
229c4fb69feSStefano Zampini     ierr = VecAXPY(Ax,-1.0,Bx);CHKERRQ(ierr);
230c4fb69feSStefano Zampini     ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
231c4fb69feSStefano Zampini     ierr = VecNorm(Ax,NORM_INFINITY,&err);CHKERRQ(ierr);
232c4fb69feSStefano Zampini     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);CHKERRQ(ierr);
233c4fb69feSStefano Zampini     ierr = VecScale(Bx,-1.0);CHKERRQ(ierr);
234c4fb69feSStefano Zampini     ierr = MatMultTransposeAdd(B,By,Bx,Bx);CHKERRQ(ierr);
235c4fb69feSStefano Zampini     ierr = VecNorm(Bx,NORM_INFINITY,&err);CHKERRQ(ierr);
236c4fb69feSStefano Zampini     if (err > PETSC_SMALL) {
237c4fb69feSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);CHKERRQ(ierr);
238c4fb69feSStefano Zampini     }
239c4fb69feSStefano Zampini   }
240c4fb69feSStefano Zampini   ierr = VecDestroy(&Ax);CHKERRQ(ierr);
241c4fb69feSStefano Zampini   ierr = VecDestroy(&Ay);CHKERRQ(ierr);
242c4fb69feSStefano Zampini   ierr = VecDestroy(&Bx);CHKERRQ(ierr);
243c4fb69feSStefano Zampini   ierr = VecDestroy(&By);CHKERRQ(ierr);
244c4fb69feSStefano Zampini 
245c4fb69feSStefano Zampini   /* Test MatMatMult */
246c4fb69feSStefano Zampini   if (ldc) {
247c4fb69feSStefano Zampini     ierr = PetscMalloc1(nrhs*(n+ldc),&Cdata);CHKERRQ(ierr);
248c4fb69feSStefano Zampini   }
249c4fb69feSStefano Zampini   ierr = MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);CHKERRQ(ierr);
250c4fb69feSStefano Zampini   ierr = MatDenseSetLDA(C,n+ldc);CHKERRQ(ierr);
251*53022affSStefano Zampini   ierr = MatSetRandom(C,NULL);CHKERRQ(ierr);
252c4fb69feSStefano Zampini   if (cgpu) {
253c4fb69feSStefano Zampini     ierr = MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);CHKERRQ(ierr);
254c4fb69feSStefano Zampini   }
255c4fb69feSStefano Zampini   for (nt = 0; nt < ntrials; nt++) {
256c4fb69feSStefano Zampini     ierr = MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr);
257c4fb69feSStefano Zampini     ierr = MatViewFromOptions(D,NULL,"-bc_view");CHKERRQ(ierr);
258c4fb69feSStefano Zampini     ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr);
259c4fb69feSStefano Zampini     if (flg) {
260c4fb69feSStefano Zampini       ierr = MatCreateVecs(B,&x,&y);CHKERRQ(ierr);
261c4fb69feSStefano Zampini       ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr);
262c4fb69feSStefano Zampini       for (i = 0; i < nrhs; i++) {
263c4fb69feSStefano Zampini         ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr);
264c4fb69feSStefano Zampini         ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr);
265c4fb69feSStefano Zampini         ierr = MatMult(B,x,y);CHKERRQ(ierr);
266c4fb69feSStefano Zampini         ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr);
267c4fb69feSStefano Zampini         ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr);
268c4fb69feSStefano Zampini         if (err > PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMat err %D %g\n",i,err);CHKERRQ(ierr); }
269c4fb69feSStefano Zampini       }
270c4fb69feSStefano Zampini       ierr = VecDestroy(&y);CHKERRQ(ierr);
271c4fb69feSStefano Zampini       ierr = VecDestroy(&x);CHKERRQ(ierr);
272c4fb69feSStefano Zampini       ierr = VecDestroy(&v);CHKERRQ(ierr);
273c4fb69feSStefano Zampini     }
274c4fb69feSStefano Zampini   }
275c4fb69feSStefano Zampini   ierr = MatDestroy(&D);CHKERRQ(ierr);
276c4fb69feSStefano Zampini 
277c4fb69feSStefano Zampini   /* Test MatTransposeMatMult */
278c4fb69feSStefano Zampini   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
279c4fb69feSStefano Zampini     for (nt = 0; nt < ntrials; nt++) {
280c4fb69feSStefano Zampini       ierr = MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr);
281c4fb69feSStefano Zampini       ierr = MatViewFromOptions(D,NULL,"-btc_view");CHKERRQ(ierr);
282c4fb69feSStefano Zampini       ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr);
283c4fb69feSStefano Zampini       if (flg) {
284c4fb69feSStefano Zampini         ierr = MatCreateVecs(B,&y,&x);CHKERRQ(ierr);
285c4fb69feSStefano Zampini         ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr);
286c4fb69feSStefano Zampini         for (i = 0; i < nrhs; i++) {
287c4fb69feSStefano Zampini           ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr);
288c4fb69feSStefano Zampini           ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr);
289c4fb69feSStefano Zampini           ierr = MatMultTranspose(B,x,y);CHKERRQ(ierr);
290c4fb69feSStefano Zampini           ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr);
291c4fb69feSStefano Zampini           ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr);
292c4fb69feSStefano Zampini           if (err > PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %D %g\n",i,err);CHKERRQ(ierr); }
293c4fb69feSStefano Zampini         }
294c4fb69feSStefano Zampini         ierr = VecDestroy(&y);CHKERRQ(ierr);
295c4fb69feSStefano Zampini         ierr = VecDestroy(&x);CHKERRQ(ierr);
296c4fb69feSStefano Zampini         ierr = VecDestroy(&v);CHKERRQ(ierr);
297c4fb69feSStefano Zampini       }
298c4fb69feSStefano Zampini     }
299c4fb69feSStefano Zampini     ierr = MatDestroy(&D);CHKERRQ(ierr);
300c4fb69feSStefano Zampini   }
301c4fb69feSStefano Zampini 
302*53022affSStefano Zampini   if (testorthog) {
303*53022affSStefano Zampini     ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr);
304*53022affSStefano Zampini     ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
305*53022affSStefano Zampini     ierr = MatH2OpusOrthogonalize(D);CHKERRQ(ierr);
306*53022affSStefano Zampini     ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr);
307*53022affSStefano Zampini     if (!flg) {
308*53022affSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after basis ortogonalization\n");CHKERRQ(ierr);
309*53022affSStefano Zampini     }
310*53022affSStefano Zampini     ierr = MatDestroy(&D);CHKERRQ(ierr);
311*53022affSStefano Zampini   }
312*53022affSStefano Zampini 
313*53022affSStefano Zampini   if (testcompress) {
314*53022affSStefano Zampini     ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr);
315*53022affSStefano Zampini     ierr = MatSetOption(D,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
316*53022affSStefano Zampini     ierr = MatH2OpusCompress(D,PETSC_SMALL);CHKERRQ(ierr);
317*53022affSStefano Zampini     ierr = MatDestroy(&D);CHKERRQ(ierr);
318*53022affSStefano Zampini   }
319*53022affSStefano Zampini 
320c4fb69feSStefano Zampini   /* check explicit operator */
321c4fb69feSStefano Zampini   if (checkexpl) {
322c4fb69feSStefano Zampini     Mat Be, Bet;
323c4fb69feSStefano Zampini 
324c4fb69feSStefano Zampini     ierr = MatComputeOperator(B,MATDENSE,&D);CHKERRQ(ierr);
325c4fb69feSStefano Zampini     ierr = MatDuplicate(D,MAT_COPY_VALUES,&Be);CHKERRQ(ierr);
326c4fb69feSStefano Zampini     ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
327c4fb69feSStefano Zampini     ierr = MatViewFromOptions(D,NULL,"-expl_view");CHKERRQ(ierr);
328c4fb69feSStefano Zampini     ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
329c4fb69feSStefano Zampini     ierr = MatViewFromOptions(D,NULL,"-diff_view");CHKERRQ(ierr);
330c4fb69feSStefano Zampini     ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr);
331c4fb69feSStefano Zampini     ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr);
332c4fb69feSStefano Zampini     ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr);
333c4fb69feSStefano Zampini     ierr = MatDestroy(&D);CHKERRQ(ierr);
334c4fb69feSStefano Zampini 
335c4fb69feSStefano Zampini     if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
336c4fb69feSStefano Zampini       ierr = MatTranspose(A,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
337c4fb69feSStefano Zampini       ierr = MatComputeOperatorTranspose(B,MATDENSE,&D);CHKERRQ(ierr);
338c4fb69feSStefano Zampini       ierr = MatDuplicate(D,MAT_COPY_VALUES,&Bet);CHKERRQ(ierr);
339c4fb69feSStefano Zampini       ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
340c4fb69feSStefano Zampini       ierr = MatViewFromOptions(D,NULL,"-expl_trans_view");CHKERRQ(ierr);
341c4fb69feSStefano Zampini       ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
342c4fb69feSStefano Zampini       ierr = MatViewFromOptions(D,NULL,"-diff_trans_view");CHKERRQ(ierr);
343c4fb69feSStefano Zampini       ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr);
344c4fb69feSStefano Zampini       ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr);
345c4fb69feSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr);
346c4fb69feSStefano Zampini       ierr = MatDestroy(&D);CHKERRQ(ierr);
347c4fb69feSStefano Zampini 
348c4fb69feSStefano Zampini       ierr = MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);CHKERRQ(ierr);
349c4fb69feSStefano Zampini       ierr = MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
350c4fb69feSStefano Zampini       ierr = MatViewFromOptions(Be,NULL,"-diff_expl_view");CHKERRQ(ierr);
351c4fb69feSStefano Zampini       ierr = MatNorm(Be,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
352c4fb69feSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);CHKERRQ(ierr);
353c4fb69feSStefano Zampini       ierr = MatDestroy(&Be);CHKERRQ(ierr);
354c4fb69feSStefano Zampini       ierr = MatDestroy(&Bet);CHKERRQ(ierr);
355c4fb69feSStefano Zampini     }
356c4fb69feSStefano Zampini   }
357c4fb69feSStefano Zampini   ierr = MatDestroy(&A);CHKERRQ(ierr);
358c4fb69feSStefano Zampini   ierr = MatDestroy(&B);CHKERRQ(ierr);
359c4fb69feSStefano Zampini   ierr = MatDestroy(&C);CHKERRQ(ierr);
360c4fb69feSStefano Zampini   ierr = PetscFree(Cdata);CHKERRQ(ierr);
361c4fb69feSStefano Zampini   ierr = PetscFree(Adata);CHKERRQ(ierr);
362c4fb69feSStefano Zampini   ierr = PetscFinalize();
363c4fb69feSStefano Zampini   return ierr;
364c4fb69feSStefano Zampini }
365c4fb69feSStefano Zampini 
366c4fb69feSStefano Zampini /*TEST
367c4fb69feSStefano Zampini 
368c4fb69feSStefano Zampini    build:
369*53022affSStefano Zampini      requires: h2opus
370c4fb69feSStefano Zampini 
371c4fb69feSStefano Zampini #tests from kernel
372c4fb69feSStefano Zampini    test:
373*53022affSStefano Zampini      requires: h2opus
374c4fb69feSStefano Zampini      nsize: 1
375c4fb69feSStefano Zampini      suffix: 1
376c4fb69feSStefano Zampini      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0
377c4fb69feSStefano Zampini 
378c4fb69feSStefano Zampini    test:
379*53022affSStefano Zampini      requires: h2opus
380c4fb69feSStefano Zampini      nsize: 1
381c4fb69feSStefano Zampini      suffix: 1_ld
382c4fb69feSStefano Zampini      output_file: output/ex66_1.out
383c4fb69feSStefano Zampini      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 0
384c4fb69feSStefano Zampini 
385c4fb69feSStefano Zampini    test:
386*53022affSStefano Zampini      requires: h2opus cuda
387c4fb69feSStefano Zampini      nsize: 1
388c4fb69feSStefano Zampini      suffix: 1_cuda
389c4fb69feSStefano Zampini      output_file: output/ex66_1.out
390c4fb69feSStefano Zampini      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1
391c4fb69feSStefano Zampini 
392c4fb69feSStefano Zampini    test:
393*53022affSStefano Zampini      requires: h2opus cuda
394c4fb69feSStefano Zampini      nsize: 1
395c4fb69feSStefano Zampini      suffix: 1_cuda_ld
396c4fb69feSStefano Zampini      output_file: output/ex66_1.out
397c4fb69feSStefano Zampini      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 1
398c4fb69feSStefano Zampini 
399c4fb69feSStefano Zampini    test:
400*53022affSStefano Zampini      requires: h2opus
401*53022affSStefano Zampini      nsize: {{2 3}}
402c4fb69feSStefano Zampini      suffix: 1_par
403*53022affSStefano Zampini      args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0
404c4fb69feSStefano Zampini 
405c4fb69feSStefano Zampini    test:
406*53022affSStefano Zampini      requires: h2opus cuda
407*53022affSStefano Zampini      nsize: {{2 3}}
408c4fb69feSStefano Zampini      suffix: 1_par_cuda
409*53022affSStefano Zampini      args: -n 64 -symm -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}}
410c4fb69feSStefano Zampini      output_file: output/ex66_1_par.out
411c4fb69feSStefano Zampini 
412c4fb69feSStefano Zampini #tests from matrix sampling (parallel or unsymmetric not supported)
413c4fb69feSStefano Zampini    test:
414*53022affSStefano Zampini      requires: h2opus
415c4fb69feSStefano Zampini      nsize: 1
416c4fb69feSStefano Zampini      suffix: 2
417c4fb69feSStefano Zampini      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0
418c4fb69feSStefano Zampini 
419c4fb69feSStefano Zampini    test:
420*53022affSStefano Zampini      requires: h2opus cuda
421c4fb69feSStefano Zampini      nsize: 1
422c4fb69feSStefano Zampini      suffix: 2_cuda
423c4fb69feSStefano Zampini      output_file: output/ex66_2.out
424c4fb69feSStefano Zampini      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}}
425c4fb69feSStefano Zampini 
426*53022affSStefano Zampini #tests view operation
427*53022affSStefano Zampini    test:
428*53022affSStefano Zampini      requires: h2opus !cuda
429*53022affSStefano Zampini      filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
430*53022affSStefano Zampini      nsize: {{1 2 3}}
431*53022affSStefano Zampini      suffix: view
432*53022affSStefano Zampini      args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
433*53022affSStefano Zampini 
434*53022affSStefano Zampini    test:
435*53022affSStefano Zampini      requires: h2opus cuda
436*53022affSStefano Zampini      filter: grep -v "MPI processes" | grep -v "\[" | grep -v "\]"
437*53022affSStefano Zampini      nsize: {{1 2 3}}
438*53022affSStefano Zampini      suffix: view_cuda
439*53022affSStefano Zampini      args: -ng 64 -kernel 1 -dim 2 -symm 1 -checkexpl -bgpu -B_view -mat_h2opus_leafsize 17 -mat_h2opus_normsamples 13 -mat_h2opus_indexmap_view ::ascii_matlab -mat_approximate_norm_samples 2 -mat_h2opus_normsamples 2
440*53022affSStefano Zampini 
441c4fb69feSStefano Zampini TEST*/
442