39 #include <rlEpisode.hpp>
40 #include <rlException.hpp>
44 namespace mountain_car {
47 enum class Action: int {
52 constexpr
int actionSize = 3;
58 : Any(std::string(
"Bad action performed : ")+comment) {}
64 : Any(std::string(
"Bad state found : ")+comment) {}
71 inline static double minPosition(
void) {
return -1.200;}
72 inline static double maxPosition(
void) {
return 0.500;}
73 inline static double minSpeed(
void) {
return -0.070;}
74 inline static double maxSpeed(
void) {
return 0.070;}
75 inline static double goalPosition(
void) {
return maxPosition();}
76 inline static double goalSpeed(
void) {
return 0.000;}
77 inline static double goalSpeedMargin(
void) {
return maxSpeed();}
78 inline static double rewardGoal(
void) {
return 1.0;};
79 inline static double rewardStep(
void) {
return 0.0;};
83 template<
typename PARAM>
86 using param_type = PARAM;
88 double position,speed;
91 Phase(
const Phase& copy) : position(copy.position), speed(copy.speed) {}
92 Phase(
double p,
double s) : position(p), speed(s) {}
96 position = copy.position;
102 void check(
void)
const {
103 if( (position > param_type::maxPosition()) || (position < param_type::minPosition())
104 || (speed > param_type::maxSpeed()) || (speed < param_type::minSpeed()) ) {
105 std::ostringstream ostr;
106 ostr <<
"mountain_car::Phase::check : At position = " << position <<
" and speed = " << speed <<
".";
111 template<
typename RANDOM_DEVICE>
113 return Phase<PARAM>(std::uniform_real_distribution<>(param_type::minPosition(), param_type::maxPosition())(gen),
114 std::uniform_real_distribution<>(param_type::minSpeed(), param_type::maxSpeed())(gen));
117 void saturateSpeed(
void) {
118 if(speed < param_type::minSpeed())
119 speed = param_type::minSpeed();
120 else if(speed > param_type::maxSpeed())
121 speed = param_type::maxSpeed();
129 template<
typename MOUNTAIN_CAR_PARAM>
134 using param_type = MOUNTAIN_CAR_PARAM;
138 using action_type = Action;
139 using reward_type = double;
149 void location(
double& position,
152 position = current_state.position;
153 speed = current_state.speed;
154 height = heightOf(position);
157 static double heightOf(
double position) {
158 return sin(3*position);
162 static double bottom(
void) {
168 current_state.check();
172 current_state.check();
173 return current_state;
176 void timeStep(
const action_type& a) {
180 case Action::actionForward:
183 case Action::actionBackward:
186 case Action::actionNone:
190 std::ostringstream ostr;
191 ostr <<
"mountain_car::Simulator::timeStep(" <<
static_cast<int>(a) <<
")";
195 current_state.speed += (0.001*aa - 0.0025*cos(3*current_state.position));
196 current_state.saturateSpeed();
197 current_state.position += current_state.speed;
199 r=param_type::rewardStep();
200 if(current_state.position < param_type::minPosition()) {
201 current_state.position = param_type::minPosition();
202 current_state.speed = 0;
204 else if(current_state.position > param_type::maxPosition()) {
206 if((current_state.speed >= param_type::goalSpeed())
208 (current_state.speed <= param_type::goalSpeed() + param_type::goalSpeedMargin())) {
209 r = param_type::rewardGoal();
218 reward_type reward(
void)
const {
222 Simulator(
void) : current_state(), r(0) {}
224 : current_state(copy.current_state),
230 current_state = copy.current_state;
239 template<
typename SIMULATOR>
244 template<
typename Q,
typename POLICY>
245 static void Qdata(std::ostream& file,
247 const POLICY& policy,
251 double coef_p,coef_s;
252 double position,speed;
255 typename SIMULATOR::phase_type current;
256 coef_p = (SIMULATOR::param_type::maxPosition()-SIMULATOR::param_type::minPosition())/((
double)(points_per_side-1));
257 coef_s = (SIMULATOR::param_type::maxSpeed()-SIMULATOR::param_type::minSpeed())/((
double)(points_per_side-1));
258 for(s=0;s<points_per_side;++s) {
259 speed = SIMULATOR::param_type::minSpeed() + coef_s*s;
260 for(p=0;p<points_per_side;++p) {
261 position = SIMULATOR::param_type::minPosition() + coef_p*p;
262 current =
typename SIMULATOR::phase_type(position,speed);
265 file << position << ' ' << speed << ' ' << q(current,a) << ' ' << static_cast<int>(a) << std::endl;
267 file << position << ' ' << speed << ' ' << static_cast<int>(a) << std::endl;
275 template<
typename Q,
typename POLICY>
276 static void drawQ(std::string title,
277 std::string file_prefix,
int rank,
279 const POLICY& policy,
280 int points_per_side=50) {
281 std::ostringstream ostr;
283 std::string numbered_prefix;
284 std::string filename;
288 ostr <<
'-' << std::setfill(
'0') << std::setw(6) << rank;
289 numbered_prefix = ostr.str();
290 filename = numbered_prefix +
".plot";
292 file.open(filename.c_str());
294 std::cerr <<
"Cannot open \"" << filename <<
"\". Plotting skipped." << std::endl;
297 file <<
"unset hidden3d;" << std::endl
298 <<
"set xrange [" << SIMULATOR::param_type::minPosition()
299 <<
":" << SIMULATOR::param_type::maxPosition() <<
"];" << std::endl
300 <<
"set yrange [" << SIMULATOR::param_type::minSpeed()
301 <<
":" << SIMULATOR::param_type::maxSpeed() <<
"];" << std::endl
302 <<
"set zrange [-1:1.5];" << std::endl
303 <<
"set cbrange [0:2];" << std::endl
304 <<
"set view 48,336;" << std::endl
305 <<
"set palette defined ( 0 \"yellow\", 1 \"red\",2 \"blue\");" << std::endl
306 <<
"set ticslevel 0;" << std::endl
307 <<
"set title \"" << title <<
"\";" << std::endl
308 <<
"set xlabel \"position\";" << std::endl
309 <<
"set ylabel \"speed\";" << std::endl
310 <<
"set zlabel \"Q(max_a)\";" << std::endl
311 <<
"set cblabel \"none=" <<
static_cast<int>(Action::actionNone)
312 <<
", forward=" <<
static_cast<int>(Action::actionForward)
313 <<
", backward=" <<
static_cast<int>(Action::actionBackward)
314 <<
"\";" << std::endl
315 <<
"set style line 100 linecolor rgb \"black\";" << std::endl
316 <<
"set pm3d at s hidden3d 100;" << std::endl
317 <<
"set output \"" << numbered_prefix <<
".png\";" << std::endl
318 <<
"set term png enhanced size 600,400;"<< std::endl
319 <<
"splot '-' using 1:2:3:4 with pm3d notitle;" << std::endl;
323 Qdata(file,q,policy,points_per_side,
true);
325 std::cout <<
"\"" << filename <<
"\" generated." << std::endl;
331 template<
typename Q,
typename POLICY>
333 std::string file_prefix,
int rank,
334 SIMULATOR& simulator,
336 const POLICY& policy,
337 unsigned int max_episode_length,
338 int points_per_side=50) {
339 std::ostringstream ostr;
340 std::ostringstream titleostr;
342 std::string numbered_prefix;
343 std::string filename,policyfilename;
350 ostr <<
'-' << std::setfill(
'0') << std::setw(6) << rank;
351 numbered_prefix = ostr.str();
352 filename = numbered_prefix +
".plot";
353 policyfilename = numbered_prefix +
"-policy.data";
355 file.open(filename.c_str());
357 std::cerr <<
"Cannot open \"" << filename <<
"\". Plotting skipped." << std::endl;
361 std::vector<std::pair<typename SIMULATOR::phase_type,typename SIMULATOR::reward_type>> transitions;
362 rl::episode::run(simulator,policy,
363 std::back_inserter(transitions),
364 [](
const typename SIMULATOR::phase_type& s,
365 const typename SIMULATOR::action_type& a,
366 const typename SIMULATOR::reward_type r,
367 const typename SIMULATOR::phase_type& s_)
368 -> std::pair<typename SIMULATOR::phase_type,typename SIMULATOR::reward_type> {
return std::make_pair(s,r);},
369 [](
const typename SIMULATOR::phase_type& s,
370 const typename SIMULATOR::action_type& a,
371 const typename SIMULATOR::reward_type r)
372 -> std::pair<typename SIMULATOR::phase_type,typename SIMULATOR::reward_type> {
return std::make_pair(s,r);},
376 for(
auto& t : transitions)
379 titleostr << title <<
"\\n cumulated reward = " << cumrew;
381 file <<
"set xrange [" << SIMULATOR::param_type::minPosition()
382 <<
":" << SIMULATOR::param_type::maxPosition() <<
"];" << std::endl
383 <<
"set yrange [" << SIMULATOR::param_type::minSpeed()
384 <<
":" << SIMULATOR::param_type::maxSpeed() <<
"];" << std::endl
385 <<
"set zrange [0:3];" << std::endl
386 <<
"set cbrange [0:3];" << std::endl
387 <<
"set title \"" << titleostr.str() <<
"\";" << std::endl
388 <<
"set palette defined ( 0 \"yellow\", 1 \"red\",2 \"blue\", 3 \"black\");" << std::endl
389 <<
"set xlabel \"position\";" << std::endl
390 <<
"set ylabel \"speed\";" << std::endl
391 <<
"set cblabel \"none=" <<
static_cast<int>(Action::actionNone)
392 <<
", forward=" <<
static_cast<int>(Action::actionForward)
393 <<
", backward=" <<
static_cast<int>(Action::actionBackward)
394 <<
"\";" << std::endl
395 <<
"set view map;" << std::endl
396 <<
"set pm3d at s;" << std::endl
397 <<
"splot '" << policyfilename <<
"' with pm3d notitle, \\" << std::endl
398 <<
" '-' with linespoints notitle pt 7 ps 0.5 lc rgb \"black\"" << std::endl;
400 for(
auto& t : transitions)
401 file << t.first.position <<
' '
402 << t.first.speed <<
' '
405 std::cout <<
"\"" << filename <<
"\" generated." << std::endl;
408 file.open(policyfilename.c_str());
410 std::cerr <<
"Cannot open \"" << filename <<
"\". Plotting skipped." << std::endl;
413 Qdata(file,q,policy,points_per_side,
false);
415 std::cout <<
"\"" << policyfilename <<
"\" generated." << std::endl;