The example below is based on OpenAI's cartpole example, the "Hello World" of reinforcement learning. We have adjusted it to implement the Pathmind interface.

The example below is just one way to implement the interface. Pathmind's flexibility means that you can implement it as you see fit.

class PathmindEnvironment(nativerl.Environment):
# Initialize Simulation Parameters
def __init__(self):
nativerl.Environment.__init__(self)
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = (self.masspole + self.masscart)
self.length = 0.5 # actually half the pole's length
self.polemass_length = (self.masspole * self.length)
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
self.kinematics_integrator = 'euler'
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4

# Reset Simulation
def reset(self):
self.state = [random.uniform(-0.05, 0.05), random.uniform(-0.05, 0.05), random.uniform(-0.05, 0.05), random.uniform(-0.05, 0.05)]
self.steps = 0
self.steps_beyond_done = None

# Set Number of Agents
def getNumberOfAgents(self):
return 1

# Define Observation Space
def getObservationSpace(self):
return nativerl.Continuous(nativerl.FloatVector([-math.inf]), nativerl.FloatVector([math.inf]), nativerl.SSizeTVector([4]))

def getObservation(self, agentId):
return nativerl.Array(nativerl.FloatVector(self.state));

# Define Rewards
def getReward(self, agentId):
if not self.isDone(agentId):
reward = 1.0
elif self.steps_beyond_done is None:
# Pole just fell!
self.steps_beyond_done = 0
reward = 1.0
else:
self.steps_beyond_done += 1
reward = 0.0
return reward

# Define Done Condition
def isDone(self, agentId):
x, x_dot, theta, theta_dot = self.state
return bool(
x < -self.x_threshold
or x > self.x_threshold
or theta < -self.theta_threshold_radians
or theta > self.theta_threshold_radians
or self.steps > 1000
)

# Define Action Space
def getActionSpace(self, i):
return nativerl.Discrete(2) if i == 0 else None

# Execute Action
def setNextAction(self, action, agentId):
self.action = action.values()[0]

# Define Steps
def step(self):
x, x_dot, theta, theta_dot = self.state
force = self.force_mag if self.action == 1 else -self.force_mag
costheta = math.cos(theta)
sintheta = math.sin(theta)
temp = (force + self.polemass_length * theta_dot ** 2 * sintheta) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta * temp) / (self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass))
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
theta_dot = theta_dot + self.tau * thetaacc
self.state = [x, x_dot, theta, theta_dot]
self.steps += 1
Did this answer your question?