xref: /petsc/src/mat/tests/ex66.c (revision c4fb69fe26503779ffb88dc78e1c4a770da29217)
1*c4fb69feSStefano Zampini static char help[] = "Tests MATHARA\n\n";
2*c4fb69feSStefano Zampini 
3*c4fb69feSStefano Zampini #include <petscmat.h>
4*c4fb69feSStefano Zampini #include <petscsf.h>
5*c4fb69feSStefano Zampini 
6*c4fb69feSStefano Zampini static PetscScalar GenEntry_Symm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
7*c4fb69feSStefano Zampini {
8*c4fb69feSStefano Zampini     PetscInt  d;
9*c4fb69feSStefano Zampini     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
10*c4fb69feSStefano Zampini     PetscReal dist, diff = 0.0;
11*c4fb69feSStefano Zampini 
12*c4fb69feSStefano Zampini     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
13*c4fb69feSStefano Zampini     dist = PetscSqrtReal(diff);
14*c4fb69feSStefano Zampini     return PetscExpReal(-dist / clength);
15*c4fb69feSStefano Zampini }
16*c4fb69feSStefano Zampini 
17*c4fb69feSStefano Zampini static PetscScalar GenEntry_Unsymm(PetscInt sdim, PetscReal x[], PetscReal y[], void *ctx)
18*c4fb69feSStefano Zampini {
19*c4fb69feSStefano Zampini     PetscInt  d;
20*c4fb69feSStefano Zampini     PetscReal clength = sdim == 3 ? 0.2 : 0.1;
21*c4fb69feSStefano Zampini     PetscReal dist, diff = 0.0, nx = 0.0, ny = 0.0;
22*c4fb69feSStefano Zampini 
23*c4fb69feSStefano Zampini     for (d = 0; d < sdim; d++) { nx += x[d]*x[d]; }
24*c4fb69feSStefano Zampini     for (d = 0; d < sdim; d++) { ny += y[d]*y[d]; }
25*c4fb69feSStefano Zampini     for (d = 0; d < sdim; d++) { diff += (x[d] - y[d]) * (x[d] - y[d]); }
26*c4fb69feSStefano Zampini     dist = PetscSqrtReal(diff);
27*c4fb69feSStefano Zampini     return nx > ny ? PetscExpReal(-dist / clength) : PetscExpReal(-dist / clength) + 1.;
28*c4fb69feSStefano Zampini }
29*c4fb69feSStefano Zampini 
30*c4fb69feSStefano Zampini int main(int argc,char **argv)
31*c4fb69feSStefano Zampini {
32*c4fb69feSStefano Zampini   Mat            A,B,C,D;
33*c4fb69feSStefano Zampini   Vec            v,x,y,Ax,Ay,Bx,By;
34*c4fb69feSStefano Zampini   PetscRandom    r;
35*c4fb69feSStefano Zampini   PetscLayout    map;
36*c4fb69feSStefano Zampini   PetscScalar    *Adata = NULL, *Cdata = NULL, scale = 1.0;
37*c4fb69feSStefano Zampini   PetscReal      *coords,nA,nD,nB,err,nX,norms[3];
38*c4fb69feSStefano Zampini   PetscInt       N, n = 64, dim = 1, i, j, nrhs = 11, lda = 0, ldc = 0, nt, ntrials = 2;
39*c4fb69feSStefano Zampini   PetscMPIInt    size,rank;
40*c4fb69feSStefano Zampini   PetscBool      testlayout = PETSC_FALSE,flg,symm = PETSC_FALSE, Asymm = PETSC_TRUE, kernel = PETSC_TRUE;
41*c4fb69feSStefano Zampini   PetscBool      checkexpl = PETSC_FALSE, agpu = PETSC_FALSE, bgpu = PETSC_FALSE, cgpu = PETSC_FALSE;
42*c4fb69feSStefano Zampini   PetscBool      testtrans, testnorm, randommat = PETSC_TRUE;
43*c4fb69feSStefano Zampini   void           (*approxnormfunc)(void);
44*c4fb69feSStefano Zampini   void           (*Anormfunc)(void);
45*c4fb69feSStefano Zampini   PetscErrorCode ierr;
46*c4fb69feSStefano Zampini 
47*c4fb69feSStefano Zampini   PETSC_MPI_THREAD_REQUIRED = MPI_THREAD_MULTIPLE;
48*c4fb69feSStefano Zampini   ierr = PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
49*c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);CHKERRQ(ierr);
50*c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);CHKERRQ(ierr);
51*c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-dim",&dim,NULL);CHKERRQ(ierr);
52*c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-lda",&lda,NULL);CHKERRQ(ierr);
53*c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-ldc",&ldc,NULL);CHKERRQ(ierr);
54*c4fb69feSStefano Zampini   ierr = PetscOptionsGetInt(NULL,NULL,"-matmattrials",&ntrials,NULL);CHKERRQ(ierr);
55*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-randommat",&randommat,NULL);CHKERRQ(ierr);
56*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-testlayout",&testlayout,NULL);CHKERRQ(ierr);
57*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-Asymm",&Asymm,NULL);CHKERRQ(ierr);
58*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-symm",&symm,NULL);CHKERRQ(ierr);
59*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-kernel",&kernel,NULL);CHKERRQ(ierr);
60*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-checkexpl",&checkexpl,NULL);CHKERRQ(ierr);
61*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-agpu",&agpu,NULL);CHKERRQ(ierr);
62*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-bgpu",&bgpu,NULL);CHKERRQ(ierr);
63*c4fb69feSStefano Zampini   ierr = PetscOptionsGetBool(NULL,NULL,"-cgpu",&cgpu,NULL);CHKERRQ(ierr);
64*c4fb69feSStefano Zampini   ierr = PetscOptionsGetScalar(NULL,NULL,"-scale",&scale,NULL);CHKERRQ(ierr);
65*c4fb69feSStefano Zampini   if (!Asymm) symm = PETSC_FALSE;
66*c4fb69feSStefano Zampini 
67*c4fb69feSStefano Zampini   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
68*c4fb69feSStefano Zampini   /* MatMultTranspose for nonsymmetric matrices not implemented */
69*c4fb69feSStefano Zampini   testtrans = (PetscBool)(size == 1 || symm);
70*c4fb69feSStefano Zampini   testnorm = (PetscBool)(size == 1 || symm);
71*c4fb69feSStefano Zampini 
72*c4fb69feSStefano Zampini   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
73*c4fb69feSStefano Zampini   ierr = PetscLayoutCreate(PETSC_COMM_WORLD,&map);CHKERRQ(ierr);
74*c4fb69feSStefano Zampini   if (testlayout) {
75*c4fb69feSStefano Zampini     if (rank%2) n = PetscMax(2*n-5*rank,0);
76*c4fb69feSStefano Zampini     else n = 2*n+rank;
77*c4fb69feSStefano Zampini   }
78*c4fb69feSStefano Zampini   ierr = PetscLayoutSetLocalSize(map,n);CHKERRQ(ierr);
79*c4fb69feSStefano Zampini   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
80*c4fb69feSStefano Zampini   ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
81*c4fb69feSStefano Zampini   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
82*c4fb69feSStefano Zampini 
83*c4fb69feSStefano Zampini   ierr = PetscRandomCreate(PETSC_COMM_WORLD,&r);CHKERRQ(ierr);
84*c4fb69feSStefano Zampini   ierr = PetscRandomSetFromOptions(r);CHKERRQ(ierr);
85*c4fb69feSStefano Zampini   if (lda) {
86*c4fb69feSStefano Zampini     ierr = PetscMalloc1(N*(n+lda),&Adata);CHKERRQ(ierr);
87*c4fb69feSStefano Zampini   }
88*c4fb69feSStefano Zampini   ierr = MatCreateDense(PETSC_COMM_WORLD,n,n,N,N,Adata,&A);CHKERRQ(ierr);
89*c4fb69feSStefano Zampini   ierr = MatDenseSetLDA(A,n+lda);CHKERRQ(ierr);
90*c4fb69feSStefano Zampini   ierr = PetscMalloc1(n*dim,&coords);CHKERRQ(ierr);
91*c4fb69feSStefano Zampini   for (j = 0; j < n; j++) {
92*c4fb69feSStefano Zampini     for (i = 0; i < dim; i++) {
93*c4fb69feSStefano Zampini       PetscScalar a;
94*c4fb69feSStefano Zampini 
95*c4fb69feSStefano Zampini       ierr = PetscRandomGetValue(r,&a);CHKERRQ(ierr);
96*c4fb69feSStefano Zampini       coords[j*dim + i] = PetscRealPart(a);
97*c4fb69feSStefano Zampini     }
98*c4fb69feSStefano Zampini   }
99*c4fb69feSStefano Zampini   if (kernel || !randommat) {
100*c4fb69feSStefano Zampini     MatHaraKernel k = Asymm ? GenEntry_Symm : GenEntry_Unsymm;
101*c4fb69feSStefano Zampini     PetscInt      ist,ien;
102*c4fb69feSStefano Zampini     PetscReal     *gcoords;
103*c4fb69feSStefano Zampini 
104*c4fb69feSStefano Zampini     if (size > 1) { /* replicated coords so that we can populate the dense matrix */
105*c4fb69feSStefano Zampini       PetscSF      sf;
106*c4fb69feSStefano Zampini       MPI_Datatype dtype;
107*c4fb69feSStefano Zampini 
108*c4fb69feSStefano Zampini       ierr = MPI_Type_contiguous(dim,MPIU_REAL,&dtype);CHKERRQ(ierr);
109*c4fb69feSStefano Zampini       ierr = MPI_Type_commit(&dtype);CHKERRQ(ierr);
110*c4fb69feSStefano Zampini 
111*c4fb69feSStefano Zampini       ierr = PetscSFCreate(PETSC_COMM_WORLD,&sf);CHKERRQ(ierr);
112*c4fb69feSStefano Zampini       ierr = MatGetLayouts(A,&map,NULL);CHKERRQ(ierr);
113*c4fb69feSStefano Zampini       ierr = PetscSFSetGraphWithPattern(sf,map,PETSCSF_PATTERN_ALLGATHER);CHKERRQ(ierr);
114*c4fb69feSStefano Zampini       ierr = PetscMalloc1(dim*N,&gcoords);CHKERRQ(ierr);
115*c4fb69feSStefano Zampini       ierr = PetscSFBcastBegin(sf,dtype,coords,gcoords);CHKERRQ(ierr);
116*c4fb69feSStefano Zampini       ierr = PetscSFBcastEnd(sf,dtype,coords,gcoords);CHKERRQ(ierr);
117*c4fb69feSStefano Zampini       ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
118*c4fb69feSStefano Zampini       ierr = MPI_Type_free(&dtype);CHKERRQ(ierr);
119*c4fb69feSStefano Zampini     } else gcoords = (PetscReal*)coords;
120*c4fb69feSStefano Zampini 
121*c4fb69feSStefano Zampini     ierr = MatGetOwnershipRange(A,&ist,&ien);CHKERRQ(ierr);
122*c4fb69feSStefano Zampini     for (i = ist; i < ien; i++) {
123*c4fb69feSStefano Zampini       for (j = 0; j < N; j++) {
124*c4fb69feSStefano Zampini         ierr = MatSetValue(A,i,j,(*k)(dim,gcoords + i*dim,gcoords + j*dim,NULL),INSERT_VALUES);CHKERRQ(ierr);
125*c4fb69feSStefano Zampini       }
126*c4fb69feSStefano Zampini     }
127*c4fb69feSStefano Zampini     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
128*c4fb69feSStefano Zampini     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
129*c4fb69feSStefano Zampini     if (kernel) {
130*c4fb69feSStefano Zampini       ierr = MatCreateHaraFromKernel(PETSC_COMM_WORLD,n,n,N,N,dim,coords,k,NULL,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
131*c4fb69feSStefano Zampini     } else {
132*c4fb69feSStefano Zampini       ierr = MatCreateHaraFromMat(A,dim,coords,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
133*c4fb69feSStefano Zampini     }
134*c4fb69feSStefano Zampini     if (gcoords != coords) { ierr = PetscFree(gcoords);CHKERRQ(ierr); }
135*c4fb69feSStefano Zampini   } else {
136*c4fb69feSStefano Zampini     ierr = MatSetRandom(A,r);CHKERRQ(ierr);
137*c4fb69feSStefano Zampini     if (Asymm) {
138*c4fb69feSStefano Zampini       ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
139*c4fb69feSStefano Zampini       ierr = MatAXPY(A,1.0,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
140*c4fb69feSStefano Zampini       ierr = MatDestroy(&B);CHKERRQ(ierr);
141*c4fb69feSStefano Zampini       ierr = MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
142*c4fb69feSStefano Zampini     }
143*c4fb69feSStefano Zampini     ierr = MatCreateHaraFromMat(A,dim,coords,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,&B);CHKERRQ(ierr);
144*c4fb69feSStefano Zampini   }
145*c4fb69feSStefano Zampini   if (agpu) {
146*c4fb69feSStefano Zampini     ierr = MatConvert(A,MATDENSECUDA,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
147*c4fb69feSStefano Zampini   }
148*c4fb69feSStefano Zampini   ierr = MatViewFromOptions(A,NULL,"-A_view");CHKERRQ(ierr);
149*c4fb69feSStefano Zampini 
150*c4fb69feSStefano Zampini   ierr = MatSetOption(B,MAT_SYMMETRIC,symm);CHKERRQ(ierr);
151*c4fb69feSStefano Zampini   ierr = PetscFree(coords);CHKERRQ(ierr);
152*c4fb69feSStefano Zampini 
153*c4fb69feSStefano Zampini   /* assemble the H-matrix */
154*c4fb69feSStefano Zampini   ierr = MatBindToCPU(B,(PetscBool)!bgpu);CHKERRQ(ierr);
155*c4fb69feSStefano Zampini   ierr = MatSetFromOptions(B);CHKERRQ(ierr);
156*c4fb69feSStefano Zampini   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
157*c4fb69feSStefano Zampini   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
158*c4fb69feSStefano Zampini   ierr = MatViewFromOptions(B,NULL,"-B_view");CHKERRQ(ierr);
159*c4fb69feSStefano Zampini 
160*c4fb69feSStefano Zampini   /* Test MatScale */
161*c4fb69feSStefano Zampini   ierr = MatScale(A,scale);CHKERRQ(ierr);
162*c4fb69feSStefano Zampini   ierr = MatScale(B,scale);CHKERRQ(ierr);
163*c4fb69feSStefano Zampini 
164*c4fb69feSStefano Zampini   /* Test MatMult */
165*c4fb69feSStefano Zampini   ierr = MatCreateVecs(A,&Ax,&Ay);CHKERRQ(ierr);
166*c4fb69feSStefano Zampini   ierr = MatCreateVecs(B,&Bx,&By);CHKERRQ(ierr);
167*c4fb69feSStefano Zampini   ierr = VecSetRandom(Ax,r);CHKERRQ(ierr);
168*c4fb69feSStefano Zampini   ierr = VecCopy(Ax,Bx);CHKERRQ(ierr);
169*c4fb69feSStefano Zampini   ierr = MatMult(A,Ax,Ay);CHKERRQ(ierr);
170*c4fb69feSStefano Zampini   ierr = MatMult(B,Bx,By);CHKERRQ(ierr);
171*c4fb69feSStefano Zampini   ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr);
172*c4fb69feSStefano Zampini   ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr);
173*c4fb69feSStefano Zampini   ierr = VecNorm(Ay,NORM_INFINITY,&nX);CHKERRQ(ierr);
174*c4fb69feSStefano Zampini   ierr = VecAXPY(Ay,-1.0,By);CHKERRQ(ierr);
175*c4fb69feSStefano Zampini   ierr = VecViewFromOptions(Ay,NULL,"-mult_vec_view");CHKERRQ(ierr);
176*c4fb69feSStefano Zampini   ierr = VecNorm(Ay,NORM_INFINITY,&err);CHKERRQ(ierr);
177*c4fb69feSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult err %g\n",err/nX);CHKERRQ(ierr);
178*c4fb69feSStefano Zampini   ierr = VecScale(By,-1.0);CHKERRQ(ierr);
179*c4fb69feSStefano Zampini   ierr = MatMultAdd(B,Bx,By,By);CHKERRQ(ierr);
180*c4fb69feSStefano Zampini   ierr = VecNorm(By,NORM_INFINITY,&err);CHKERRQ(ierr);
181*c4fb69feSStefano Zampini   ierr = VecViewFromOptions(By,NULL,"-mult_vec_view");CHKERRQ(ierr);
182*c4fb69feSStefano Zampini   if (err > PETSC_SMALL) {
183*c4fb69feSStefano Zampini     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultAdd err %g\n",err);CHKERRQ(ierr);
184*c4fb69feSStefano Zampini   }
185*c4fb69feSStefano Zampini 
186*c4fb69feSStefano Zampini   /* Test MatNorm */
187*c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
188*c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr);
189*c4fb69feSStefano Zampini   norms[2] = -1.; /* NORM_2 not supported */
190*c4fb69feSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_WORLD,"A Matrix norms:        infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
191*c4fb69feSStefano Zampini   ierr = MatGetOperation(A,MATOP_NORM,&Anormfunc);CHKERRQ(ierr);
192*c4fb69feSStefano Zampini   ierr = MatGetOperation(B,MATOP_NORM,&approxnormfunc);CHKERRQ(ierr);
193*c4fb69feSStefano Zampini   ierr = MatSetOperation(A,MATOP_NORM,approxnormfunc);CHKERRQ(ierr);
194*c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
195*c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_1,&norms[1]);CHKERRQ(ierr);
196*c4fb69feSStefano Zampini   ierr = MatNorm(A,NORM_2,&norms[2]);CHKERRQ(ierr);
197*c4fb69feSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_WORLD,"A Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
198*c4fb69feSStefano Zampini   if (testnorm) {
199*c4fb69feSStefano Zampini     ierr = MatNorm(B,NORM_INFINITY,&norms[0]);CHKERRQ(ierr);
200*c4fb69feSStefano Zampini     ierr = MatNorm(B,NORM_1,&norms[1]);CHKERRQ(ierr);
201*c4fb69feSStefano Zampini     ierr = MatNorm(B,NORM_2,&norms[2]);CHKERRQ(ierr);
202*c4fb69feSStefano Zampini   } else {
203*c4fb69feSStefano Zampini     norms[0] = -1.;
204*c4fb69feSStefano Zampini     norms[1] = -1.;
205*c4fb69feSStefano Zampini     norms[2] = -1.;
206*c4fb69feSStefano Zampini   }
207*c4fb69feSStefano Zampini   ierr = PetscPrintf(PETSC_COMM_WORLD,"B Approx Matrix norms: infty=%g, norm_1=%g, norm_2=%g\n",(double)norms[0],(double)norms[1],(double)norms[2]);CHKERRQ(ierr);
208*c4fb69feSStefano Zampini   ierr = MatSetOperation(A,MATOP_NORM,Anormfunc);CHKERRQ(ierr);
209*c4fb69feSStefano Zampini 
210*c4fb69feSStefano Zampini   /* Test MatDuplicate */
211*c4fb69feSStefano Zampini   ierr = MatDuplicate(B,MAT_COPY_VALUES,&D);CHKERRQ(ierr);
212*c4fb69feSStefano Zampini   ierr = MatMultEqual(B,D,10,&flg);CHKERRQ(ierr);
213*c4fb69feSStefano Zampini   if (!flg) {
214*c4fb69feSStefano Zampini     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMult error after MatDuplicate\n");CHKERRQ(ierr);
215*c4fb69feSStefano Zampini   }
216*c4fb69feSStefano Zampini   if (testtrans) {
217*c4fb69feSStefano Zampini     ierr = MatMultTransposeEqual(B,D,10,&flg);CHKERRQ(ierr);
218*c4fb69feSStefano Zampini     if (!flg) {
219*c4fb69feSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranpose error after MatDuplicate\n");CHKERRQ(ierr);
220*c4fb69feSStefano Zampini     }
221*c4fb69feSStefano Zampini   }
222*c4fb69feSStefano Zampini   ierr = MatDestroy(&D);CHKERRQ(ierr);
223*c4fb69feSStefano Zampini 
224*c4fb69feSStefano Zampini   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
225*c4fb69feSStefano Zampini     ierr = VecSetRandom(Ay,r);CHKERRQ(ierr);
226*c4fb69feSStefano Zampini     ierr = VecCopy(Ay,By);CHKERRQ(ierr);
227*c4fb69feSStefano Zampini     ierr = MatMultTranspose(A,Ay,Ax);CHKERRQ(ierr);
228*c4fb69feSStefano Zampini     ierr = MatMultTranspose(B,By,Bx);CHKERRQ(ierr);
229*c4fb69feSStefano Zampini     ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
230*c4fb69feSStefano Zampini     ierr = VecViewFromOptions(Bx,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
231*c4fb69feSStefano Zampini     ierr = VecNorm(Ax,NORM_INFINITY,&nX);CHKERRQ(ierr);
232*c4fb69feSStefano Zampini     ierr = VecAXPY(Ax,-1.0,Bx);CHKERRQ(ierr);
233*c4fb69feSStefano Zampini     ierr = VecViewFromOptions(Ax,NULL,"-multtrans_vec_view");CHKERRQ(ierr);
234*c4fb69feSStefano Zampini     ierr = VecNorm(Ax,NORM_INFINITY,&err);CHKERRQ(ierr);
235*c4fb69feSStefano Zampini     ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTranspose err %g\n",err/nX);CHKERRQ(ierr);
236*c4fb69feSStefano Zampini     ierr = VecScale(Bx,-1.0);CHKERRQ(ierr);
237*c4fb69feSStefano Zampini     ierr = MatMultTransposeAdd(B,By,Bx,Bx);CHKERRQ(ierr);
238*c4fb69feSStefano Zampini     ierr = VecNorm(Bx,NORM_INFINITY,&err);CHKERRQ(ierr);
239*c4fb69feSStefano Zampini     if (err > PETSC_SMALL) {
240*c4fb69feSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMultTransposeAdd err %g\n",err);CHKERRQ(ierr);
241*c4fb69feSStefano Zampini     }
242*c4fb69feSStefano Zampini   }
243*c4fb69feSStefano Zampini   ierr = VecDestroy(&Ax);CHKERRQ(ierr);
244*c4fb69feSStefano Zampini   ierr = VecDestroy(&Ay);CHKERRQ(ierr);
245*c4fb69feSStefano Zampini   ierr = VecDestroy(&Bx);CHKERRQ(ierr);
246*c4fb69feSStefano Zampini   ierr = VecDestroy(&By);CHKERRQ(ierr);
247*c4fb69feSStefano Zampini 
248*c4fb69feSStefano Zampini   /* Test MatMatMult */
249*c4fb69feSStefano Zampini   if (ldc) {
250*c4fb69feSStefano Zampini     ierr = PetscMalloc1(nrhs*(n+ldc),&Cdata);CHKERRQ(ierr);
251*c4fb69feSStefano Zampini   }
252*c4fb69feSStefano Zampini   ierr = MatCreateDense(PETSC_COMM_WORLD,n,PETSC_DECIDE,N,nrhs,Cdata,&C);CHKERRQ(ierr);
253*c4fb69feSStefano Zampini   ierr = MatDenseSetLDA(C,n+ldc);CHKERRQ(ierr);
254*c4fb69feSStefano Zampini   ierr = MatSetRandom(C,r);CHKERRQ(ierr);
255*c4fb69feSStefano Zampini   if (cgpu) {
256*c4fb69feSStefano Zampini     ierr = MatConvert(C,MATDENSECUDA,MAT_INPLACE_MATRIX,&C);CHKERRQ(ierr);
257*c4fb69feSStefano Zampini   }
258*c4fb69feSStefano Zampini   for (nt = 0; nt < ntrials; nt++) {
259*c4fb69feSStefano Zampini     ierr = MatMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr);
260*c4fb69feSStefano Zampini     ierr = MatViewFromOptions(D,NULL,"-bc_view");CHKERRQ(ierr);
261*c4fb69feSStefano Zampini     ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr);
262*c4fb69feSStefano Zampini     if (flg) {
263*c4fb69feSStefano Zampini       ierr = MatCreateVecs(B,&x,&y);CHKERRQ(ierr);
264*c4fb69feSStefano Zampini       ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr);
265*c4fb69feSStefano Zampini       for (i = 0; i < nrhs; i++) {
266*c4fb69feSStefano Zampini         ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr);
267*c4fb69feSStefano Zampini         ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr);
268*c4fb69feSStefano Zampini         ierr = MatMult(B,x,y);CHKERRQ(ierr);
269*c4fb69feSStefano Zampini         ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr);
270*c4fb69feSStefano Zampini         ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr);
271*c4fb69feSStefano Zampini         if (err > PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatMat err %D %g\n",i,err);CHKERRQ(ierr); }
272*c4fb69feSStefano Zampini       }
273*c4fb69feSStefano Zampini       ierr = VecDestroy(&y);CHKERRQ(ierr);
274*c4fb69feSStefano Zampini       ierr = VecDestroy(&x);CHKERRQ(ierr);
275*c4fb69feSStefano Zampini       ierr = VecDestroy(&v);CHKERRQ(ierr);
276*c4fb69feSStefano Zampini     }
277*c4fb69feSStefano Zampini   }
278*c4fb69feSStefano Zampini   ierr = MatDestroy(&D);CHKERRQ(ierr);
279*c4fb69feSStefano Zampini 
280*c4fb69feSStefano Zampini   /* Test MatTransposeMatMult */
281*c4fb69feSStefano Zampini   if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
282*c4fb69feSStefano Zampini     for (nt = 0; nt < ntrials; nt++) {
283*c4fb69feSStefano Zampini       ierr = MatTransposeMatMult(B,C,nt ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&D);CHKERRQ(ierr);
284*c4fb69feSStefano Zampini       ierr = MatViewFromOptions(D,NULL,"-btc_view");CHKERRQ(ierr);
285*c4fb69feSStefano Zampini       ierr = PetscObjectBaseTypeCompareAny((PetscObject)D,&flg,MATSEQDENSE,MATMPIDENSE,"");CHKERRQ(ierr);
286*c4fb69feSStefano Zampini       if (flg) {
287*c4fb69feSStefano Zampini         ierr = MatCreateVecs(B,&y,&x);CHKERRQ(ierr);
288*c4fb69feSStefano Zampini         ierr = MatCreateVecs(D,NULL,&v);CHKERRQ(ierr);
289*c4fb69feSStefano Zampini         for (i = 0; i < nrhs; i++) {
290*c4fb69feSStefano Zampini           ierr = MatGetColumnVector(D,v,i);CHKERRQ(ierr);
291*c4fb69feSStefano Zampini           ierr = MatGetColumnVector(C,x,i);CHKERRQ(ierr);
292*c4fb69feSStefano Zampini           ierr = MatMultTranspose(B,x,y);CHKERRQ(ierr);
293*c4fb69feSStefano Zampini           ierr = VecAXPY(y,-1.0,v);CHKERRQ(ierr);
294*c4fb69feSStefano Zampini           ierr = VecNorm(y,NORM_INFINITY,&err);CHKERRQ(ierr);
295*c4fb69feSStefano Zampini           if (err > PETSC_SMALL) { ierr = PetscPrintf(PETSC_COMM_WORLD,"MatTransMat err %D %g\n",i,err);CHKERRQ(ierr); }
296*c4fb69feSStefano Zampini         }
297*c4fb69feSStefano Zampini         ierr = VecDestroy(&y);CHKERRQ(ierr);
298*c4fb69feSStefano Zampini         ierr = VecDestroy(&x);CHKERRQ(ierr);
299*c4fb69feSStefano Zampini         ierr = VecDestroy(&v);CHKERRQ(ierr);
300*c4fb69feSStefano Zampini       }
301*c4fb69feSStefano Zampini     }
302*c4fb69feSStefano Zampini     ierr = MatDestroy(&D);CHKERRQ(ierr);
303*c4fb69feSStefano Zampini   }
304*c4fb69feSStefano Zampini 
305*c4fb69feSStefano Zampini   /* check explicit operator */
306*c4fb69feSStefano Zampini   if (checkexpl) {
307*c4fb69feSStefano Zampini     Mat Be, Bet;
308*c4fb69feSStefano Zampini 
309*c4fb69feSStefano Zampini     ierr = MatComputeOperator(B,MATDENSE,&D);CHKERRQ(ierr);
310*c4fb69feSStefano Zampini     ierr = MatDuplicate(D,MAT_COPY_VALUES,&Be);CHKERRQ(ierr);
311*c4fb69feSStefano Zampini     ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
312*c4fb69feSStefano Zampini     ierr = MatViewFromOptions(D,NULL,"-expl_view");CHKERRQ(ierr);
313*c4fb69feSStefano Zampini     ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
314*c4fb69feSStefano Zampini     ierr = MatViewFromOptions(D,NULL,"-diff_view");CHKERRQ(ierr);
315*c4fb69feSStefano Zampini     ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr);
316*c4fb69feSStefano Zampini     ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr);
317*c4fb69feSStefano Zampini     ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr);
318*c4fb69feSStefano Zampini     ierr = MatDestroy(&D);CHKERRQ(ierr);
319*c4fb69feSStefano Zampini 
320*c4fb69feSStefano Zampini     if (testtrans) { /* MatMultTranspose for nonsymmetric matrices not implemented */
321*c4fb69feSStefano Zampini       ierr = MatTranspose(A,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
322*c4fb69feSStefano Zampini       ierr = MatComputeOperatorTranspose(B,MATDENSE,&D);CHKERRQ(ierr);
323*c4fb69feSStefano Zampini       ierr = MatDuplicate(D,MAT_COPY_VALUES,&Bet);CHKERRQ(ierr);
324*c4fb69feSStefano Zampini       ierr = MatNorm(D,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
325*c4fb69feSStefano Zampini       ierr = MatViewFromOptions(D,NULL,"-expl_trans_view");CHKERRQ(ierr);
326*c4fb69feSStefano Zampini       ierr = MatAXPY(D,-1.0,A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
327*c4fb69feSStefano Zampini       ierr = MatViewFromOptions(D,NULL,"-diff_trans_view");CHKERRQ(ierr);
328*c4fb69feSStefano Zampini       ierr = MatNorm(D,NORM_FROBENIUS,&nD);CHKERRQ(ierr);
329*c4fb69feSStefano Zampini       ierr = MatNorm(A,NORM_FROBENIUS,&nA);CHKERRQ(ierr);
330*c4fb69feSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error transpose %g (%g / %g, %g)\n",nD/nA,nD,nA,nB);CHKERRQ(ierr);
331*c4fb69feSStefano Zampini       ierr = MatDestroy(&D);CHKERRQ(ierr);
332*c4fb69feSStefano Zampini 
333*c4fb69feSStefano Zampini       ierr = MatTranspose(Bet,MAT_INPLACE_MATRIX,&Bet);CHKERRQ(ierr);
334*c4fb69feSStefano Zampini       ierr = MatAXPY(Be,-1.0,Bet,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
335*c4fb69feSStefano Zampini       ierr = MatViewFromOptions(Be,NULL,"-diff_expl_view");CHKERRQ(ierr);
336*c4fb69feSStefano Zampini       ierr = MatNorm(Be,NORM_FROBENIUS,&nB);CHKERRQ(ierr);
337*c4fb69feSStefano Zampini       ierr = PetscPrintf(PETSC_COMM_WORLD,"Approximation error B - (B^T)^T %g\n",nB);CHKERRQ(ierr);
338*c4fb69feSStefano Zampini       ierr = MatDestroy(&Be);CHKERRQ(ierr);
339*c4fb69feSStefano Zampini       ierr = MatDestroy(&Bet);CHKERRQ(ierr);
340*c4fb69feSStefano Zampini     }
341*c4fb69feSStefano Zampini   }
342*c4fb69feSStefano Zampini   ierr = MatDestroy(&A);CHKERRQ(ierr);
343*c4fb69feSStefano Zampini   ierr = MatDestroy(&B);CHKERRQ(ierr);
344*c4fb69feSStefano Zampini   ierr = MatDestroy(&C);CHKERRQ(ierr);
345*c4fb69feSStefano Zampini   ierr = PetscRandomDestroy(&r);CHKERRQ(ierr);
346*c4fb69feSStefano Zampini   ierr = PetscFree(Cdata);CHKERRQ(ierr);
347*c4fb69feSStefano Zampini   ierr = PetscFree(Adata);CHKERRQ(ierr);
348*c4fb69feSStefano Zampini   ierr = PetscFinalize();
349*c4fb69feSStefano Zampini   return ierr;
350*c4fb69feSStefano Zampini }
351*c4fb69feSStefano Zampini 
352*c4fb69feSStefano Zampini /*TEST
353*c4fb69feSStefano Zampini 
354*c4fb69feSStefano Zampini    build:
355*c4fb69feSStefano Zampini      requires: hara
356*c4fb69feSStefano Zampini 
357*c4fb69feSStefano Zampini #tests from kernel
358*c4fb69feSStefano Zampini    test:
359*c4fb69feSStefano Zampini      requires: hara
360*c4fb69feSStefano Zampini      nsize: 1
361*c4fb69feSStefano Zampini      suffix: 1
362*c4fb69feSStefano Zampini      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 0
363*c4fb69feSStefano Zampini 
364*c4fb69feSStefano Zampini    test:
365*c4fb69feSStefano Zampini      requires: hara
366*c4fb69feSStefano Zampini      nsize: 1
367*c4fb69feSStefano Zampini      suffix: 1_ld
368*c4fb69feSStefano Zampini      output_file: output/ex66_1.out
369*c4fb69feSStefano Zampini      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 0
370*c4fb69feSStefano Zampini 
371*c4fb69feSStefano Zampini    test:
372*c4fb69feSStefano Zampini      requires: hara cuda
373*c4fb69feSStefano Zampini      nsize: 1
374*c4fb69feSStefano Zampini      suffix: 1_cuda
375*c4fb69feSStefano Zampini      output_file: output/ex66_1.out
376*c4fb69feSStefano Zampini      args: -n {{17 33}} -kernel 1 -dim {{1 2 3}} -symm {{0 1}} -checkexpl -bgpu 1
377*c4fb69feSStefano Zampini 
378*c4fb69feSStefano Zampini    test:
379*c4fb69feSStefano Zampini      requires: hara cuda
380*c4fb69feSStefano Zampini      nsize: 1
381*c4fb69feSStefano Zampini      suffix: 1_cuda_ld
382*c4fb69feSStefano Zampini      output_file: output/ex66_1.out
383*c4fb69feSStefano Zampini      args: -n 33 -kernel 1 -dim 1 -lda 13 -ldc 11 -symm 0 -checkexpl -bgpu 1
384*c4fb69feSStefano Zampini 
385*c4fb69feSStefano Zampini    test:
386*c4fb69feSStefano Zampini      requires: hara define(PETSC_HAVE_MPI_INIT_THREAD)
387*c4fb69feSStefano Zampini      nsize: 2
388*c4fb69feSStefano Zampini      suffix: 1_par
389*c4fb69feSStefano Zampini      args: -n 32 -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu 0 -cgpu 0
390*c4fb69feSStefano Zampini 
391*c4fb69feSStefano Zampini    test:
392*c4fb69feSStefano Zampini      requires: hara cuda define(PETSC_HAVE_MPI_INIT_THREAD)
393*c4fb69feSStefano Zampini      nsize: 2
394*c4fb69feSStefano Zampini      suffix: 1_par_cuda
395*c4fb69feSStefano Zampini      args: -n 32 -kernel 1 -dim 1 -ldc 12 -testlayout {{0 1}} -bgpu {{0 1}} -cgpu {{0 1}}
396*c4fb69feSStefano Zampini      output_file: output/ex66_1_par.out
397*c4fb69feSStefano Zampini 
398*c4fb69feSStefano Zampini #tests from matrix sampling (parallel or unsymmetric not supported)
399*c4fb69feSStefano Zampini    test:
400*c4fb69feSStefano Zampini      requires: hara
401*c4fb69feSStefano Zampini      nsize: 1
402*c4fb69feSStefano Zampini      suffix: 2
403*c4fb69feSStefano Zampini      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu 0
404*c4fb69feSStefano Zampini 
405*c4fb69feSStefano Zampini    test:
406*c4fb69feSStefano Zampini      requires: hara cuda
407*c4fb69feSStefano Zampini      nsize: 1
408*c4fb69feSStefano Zampini      suffix: 2_cuda
409*c4fb69feSStefano Zampini      output_file: output/ex66_2.out
410*c4fb69feSStefano Zampini      args: -n {{17 33}} -kernel 0 -dim 2 -symm 1 -checkexpl -bgpu {{0 1}} -agpu {{0 1}}
411*c4fb69feSStefano Zampini 
412*c4fb69feSStefano Zampini TEST*/
413