xref: /petsc/src/binding/petsc4py/demo/regressor/test_regressor_synthetic.py (revision c12c126234ed623246a63bfa78c9f75a3aa00323)
1*34b254c5SRichard Tran Mills# Linear regression with synthetic data generation
2*34b254c5SRichard Tran Mills# ================================================
3*34b254c5SRichard Tran Mills#
4*34b254c5SRichard Tran Mills# Generate a synthetic data set using sklearn.datasets.make_regression() and
5*34b254c5SRichard Tran Mills# then solve it using the petsc4py interface to PetscRegressor.
6*34b254c5SRichard Tran Mills
7*34b254c5SRichard Tran Millsimport numpy as np
8*34b254c5SRichard Tran Mills# Needed for plotting
9*34b254c5SRichard Tran Millsimport matplotlib.colors
10*34b254c5SRichard Tran Millsimport matplotlib.pyplot as plt
11*34b254c5SRichard Tran Mills
12*34b254c5SRichard Tran Mills# Needed for generating classification, regression and clustering datasets
13*34b254c5SRichard Tran Millsimport sklearn.datasets as dt
14*34b254c5SRichard Tran Mills
15*34b254c5SRichard Tran Mills# Needed for evaluating the quality of the regression for the test data set
16*34b254c5SRichard Tran Millsfrom sklearn.metrics import mean_squared_error
17*34b254c5SRichard Tran Mills
18*34b254c5SRichard Tran Millsimport argparse
19*34b254c5SRichard Tran Millsparser = argparse.ArgumentParser('Test MLG using synthetic data')
20*34b254c5SRichard Tran Millsparser.add_argument('--nsample', type=int, default=10000)
21*34b254c5SRichard Tran Millsparser.add_argument('--nfeature', type=int, default=10)
22*34b254c5SRichard Tran Millsparser.add_argument('--add_noise', action='store_true')
23*34b254c5SRichard Tran Millsargs, unknown = parser.parse_known_args()
24*34b254c5SRichard Tran Mills
25*34b254c5SRichard Tran Millsimport sys
26*34b254c5SRichard Tran Millsimport petsc4py
27*34b254c5SRichard Tran Millssys.argv = [sys.argv[0]] + unknown
28*34b254c5SRichard Tran Millspetsc4py.init(sys.argv)
29*34b254c5SRichard Tran Millsfrom petsc4py import PETSc
30*34b254c5SRichard Tran Mills
31*34b254c5SRichard Tran Mills# Define the seed so that results can be reproduced
32*34b254c5SRichard Tran Millsseed = 11
33*34b254c5SRichard Tran Millsrand_state = 11
34*34b254c5SRichard Tran Mills
35*34b254c5SRichard Tran Mills# Define the color maps for plots
36*34b254c5SRichard Tran Millscolor_map = plt.cm.get_cmap('RdYlBu')
37*34b254c5SRichard Tran Millscolor_map_discrete = matplotlib.colors.LinearSegmentedColormap.from_list("", ["red","cyan","magenta","blue"])
38*34b254c5SRichard Tran Mills
39*34b254c5SRichard Tran Millsdef petsc_regression_test(nsample, nfeature, noise=0, rand_state=11):
40*34b254c5SRichard Tran Mills  ntr = round(nsample*9/10)
41*34b254c5SRichard Tran Mills  nte = nsample - ntr
42*34b254c5SRichard Tran Mills  x,y = dt.make_regression(n_samples=nsample,
43*34b254c5SRichard Tran Mills                           n_features=nfeature,
44*34b254c5SRichard Tran Mills                           noise=noise,
45*34b254c5SRichard Tran Mills                           random_state=rand_state)
46*34b254c5SRichard Tran Mills  x_train, y_train = x[:ntr,], y[:ntr]
47*34b254c5SRichard Tran Mills  xte, yte = x[ntr:,], y[ntr:]
48*34b254c5SRichard Tran Mills  comm = PETSc.COMM_WORLD
49*34b254c5SRichard Tran Mills  rank = comm.getRank()
50*34b254c5SRichard Tran Mills  regressor = PETSc.Regressor().create(comm=comm)
51*34b254c5SRichard Tran Mills  regressor.setType(PETSc.Regressor.Type.LINEAR)
52*34b254c5SRichard Tran Mills  rows_ix = np.arange(ntr,dtype=np.int32)
53*34b254c5SRichard Tran Mills  cols_ix = np.arange(nfeature,dtype=np.int32)
54*34b254c5SRichard Tran Mills  X = PETSc.Mat().create(comm=comm)
55*34b254c5SRichard Tran Mills  X.setSizes((ntr,nfeature))
56*34b254c5SRichard Tran Mills  X.setFromOptions()
57*34b254c5SRichard Tran Mills  X.setUp()
58*34b254c5SRichard Tran Mills  y = PETSc.Vec().create(comm=comm)
59*34b254c5SRichard Tran Mills  y.setSizes(ntr)
60*34b254c5SRichard Tran Mills  y.setFromOptions()
61*34b254c5SRichard Tran Mills  if not rank :
62*34b254c5SRichard Tran Mills    X.setValues(rows_ix,cols_ix,x_train,addv=True)
63*34b254c5SRichard Tran Mills    y.setValues(rows_ix,y_train,addv=False)
64*34b254c5SRichard Tran Mills  X.assemblyBegin(X.AssemblyType.FINAL)
65*34b254c5SRichard Tran Mills  X.assemblyEnd(X.AssemblyType.FINAL)
66*34b254c5SRichard Tran Mills  y.assemblyBegin()
67*34b254c5SRichard Tran Mills  y.assemblyEnd()
68*34b254c5SRichard Tran Mills  regressor.fit(X,y)
69*34b254c5SRichard Tran Mills  rows_ix = np.arange(nte,dtype=np.int32)
70*34b254c5SRichard Tran Mills  X = PETSc.Mat().create(comm=comm)
71*34b254c5SRichard Tran Mills  X.setSizes((nte,nfeature))
72*34b254c5SRichard Tran Mills  X.setFromOptions()
73*34b254c5SRichard Tran Mills  X.setUp()
74*34b254c5SRichard Tran Mills  y = PETSc.Vec().create(comm=comm)
75*34b254c5SRichard Tran Mills  y.setSizes(nte)
76*34b254c5SRichard Tran Mills  y.setFromOptions()
77*34b254c5SRichard Tran Mills  if not rank :
78*34b254c5SRichard Tran Mills    X.setValues(rows_ix,cols_ix,xte,addv=True)
79*34b254c5SRichard Tran Mills    y.zeroEntries()
80*34b254c5SRichard Tran Mills  X.assemblyBegin(X.AssemblyType.FINAL)
81*34b254c5SRichard Tran Mills  X.assemblyEnd(X.AssemblyType.FINAL)
82*34b254c5SRichard Tran Mills  y.assemblyBegin()
83*34b254c5SRichard Tran Mills  y.assemblyEnd()
84*34b254c5SRichard Tran Mills  regressor.predict(X,y)
85*34b254c5SRichard Tran Mills  ypr = y.getArray()
86*34b254c5SRichard Tran Mills  error = mean_squared_error(ypr,yte)
87*34b254c5SRichard Tran Mills  print(f"Test MSE: {error:f}")
88*34b254c5SRichard Tran Mills  return xte,ypr
89*34b254c5SRichard Tran Mills
90*34b254c5SRichard Tran Millsxte,ypr = petsc_regression_test(args.nsample, args.nfeature, 0)
91*34b254c5SRichard Tran Mills
92*34b254c5SRichard Tran Millsif args.add_noise:
93*34b254c5SRichard Tran Mills  fig,ax = plt.subplots(nrows=2, ncols=3,figsize=(16,7))
94*34b254c5SRichard Tran Mills  plt_ind_list = np.arange(6)+231
95*34b254c5SRichard Tran Mills
96*34b254c5SRichard Tran Mills  for noise,plt_ind in zip([0,0.1,1,10,100,1000],plt_ind_list):
97*34b254c5SRichard Tran Mills    xte,ypr = petsc_regression_test(args.nsample, args.nfeature, noise, rand_state)
98*34b254c5SRichard Tran Mills
99*34b254c5SRichard Tran Mills    plt.subplot(plt_ind)
100*34b254c5SRichard Tran Mills    my_scatter_plot = plt.scatter(xte[:,0],
101*34b254c5SRichard Tran Mills                                  xte[:,1],
102*34b254c5SRichard Tran Mills                                  c=ypr,
103*34b254c5SRichard Tran Mills                                  vmin=min(ypr),
104*34b254c5SRichard Tran Mills                                  vmax=max(ypr),
105*34b254c5SRichard Tran Mills                                  s=35,
106*34b254c5SRichard Tran Mills                                  cmap=color_map)
107*34b254c5SRichard Tran Mills
108*34b254c5SRichard Tran Mills    plt.title('noise: '+str(noise))
109*34b254c5SRichard Tran Mills    plt.colorbar(my_scatter_plot)
110*34b254c5SRichard Tran Mills
111*34b254c5SRichard Tran Mills  fig.subplots_adjust(hspace=0.3,wspace=.3)
112*34b254c5SRichard Tran Mills  plt.suptitle('PETSc regression tests with different noise levels',fontsize=20)
113*34b254c5SRichard Tran Mills  plt.show(block=True)
114