episodes = generate_training_episodes(num_episodes)
for episode in episodes:
for state, action, reward, next_state in episode:
# Get the available actions for the next state
next_available_actions = get_available_actions(next_state)
# Calculate the maximum Q-value for the next state
next_q_values = [Q_table[(next_state, next_action)] for next_action in next_available_actions]
max_next_q_value = max(next_q_values)
# Update the Q-value for the current state-action pair
q_value = Q_table[(state, action)]
Q_table[(state, action)] = q_value + ALPHA * (reward + GAMMA * max_next_q_value - q_value)
上記のコードでは値エラーが発生します
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-7df231fe06a5> in <cell line: 1>()
----> 1 episodes = generate_training_episodes(num_episodes)
2 for episode in episodes:
3 for state, action, reward, next_state in episode:
4 # Get the available actions for the next state
5 next_available_actions = get_available_actions(next_state)
1 frames
<ipython-input-15-84c03145c31c> in transition_function(state, action)
54
55 elif action.source.startswith("W"):
---> 56 source_index = int(source_id) - 1
57 new_wholesaler_inventories[source_index] -= action.quantity
58 if destination_prefix == "LM":
ValueError: invalid literal for int() with base 10: ''
これは遷移関数の問題であり、次のように定義しました。
def transition_function(state, action):
new_farmer_inventories = state.farmer_inventories.copy()
new_local_trader_inventories = state.local_trader_inventories.copy()
new_ppc_inventories = state.ppc_inventories.copy()
new_wholesaler_inventories = state.wholesaler_inventories.copy()
new_ripening_storage_inventories = state.ripening_storage_inventories.copy()
new_local_market_demands = state.local_market_demands.copy()
new_farmers_market_demands = state.farmers_market_demands.copy()
new_retailer_demands = state.retailer_demands.copy()
source_id = action.source[1:] if action.source.startswith("F") else \
action.source[2:] if action.source.startswith(("LT", "PPC", "W", "RS")) else ""
destination_prefix = action.destination[:2] if action.destination.startswith(("LM", "FM")) else \
action.destination[0] if action.destination.startswith(("F", "R")) else ""
destination_id = action.destination[2:] if action.destination.startswith(("PPC", "LT", "W")) else \
action.destination[1:] if action.destination.startswith(("F", "R")) else \
action.destination[3:] if action.destination.startswith(("LM", "FM")) else ""
# Update inventory levels
if action.source.startswith("F"):
source_index = int(source_id) - 1
new_farmer_inventories[source_index] -= action.quantity
if action.destination.startswith("LT"):
destination_index = int(destination_id) - 1
new_local_trader_inventories[destination_index] += action.quantity
elif action.destination.startswith("PPC"):
destination_index = int(destination_id) - 1
new_ppc_inventories[destination_index] += action.quantity
elif action.destination.startswith("W"):
destination_index = int(destination_id) - 1
new_wholesaler_inventories[destination_index] += action.quantity
elif action.source.startswith("LT"):
source_index = int(source_id) - 1
new_local_trader_inventories[source_index] -= action.quantity
if action.destination.startswith("PPC"):
destination_index = int(destination_id) - 1
new_ppc_inventories[destination_index] += action.quantity
elif action.destination.startswith("W"):
destination_index = int(destination_id) - 1
new_wholesaler_inventories[destination_index] += action.quantity
elif action.source.startswith("PPC"):
source_index = int(source_id) - 1
new_ppc_inventories[source_index] -= action.quantity
if action.destination.startswith("W"):
destination_index = int(destination_id) - 1
new_wholesaler_inventories[destination_index] += action.quantity
elif action.destination.startswith("RS"):
destination_index = int(destination_id) - 1
new_ripening_storage_inventories[destination_index] += action.quantity
elif action.source.startswith("W"):
source_index = int(source_id) - 1
new_wholesaler_inventories[source_index] -= action.quantity
if destination_prefix == "LM":
destination_index = int(destination_id) - 1
new_local_market_demands[destination_index] = max(new_local_market_demands[destination_index] - action.quantity, 0)
elif destination_prefix == "FM":
destination_index = int(destination_id) - 1
new_farmers_market_demands[destination_index] = max(new_farmers_market_demands[destination_index] - action.quantity, 0)
elif destination_prefix == "R":
destination_index = int(destination_id) - 1
new_retailer_demands[destination_index] = max(new_retailer_demands[destination_index] - action.quantity, 0)
elif action.source.startswith("RS"):
source_index = int(source_id) - 1
new_ripening_storage_inventories[source_index] -= action.quantity
if action.destination.startswith("W"):
destination_index = int(destination_id) - 1
new_wholesaler_inventories[destination_index] += action.quantity
new_state = State(
new_farmer_inventories,
new_local_trader_inventories,
new_ppc_inventories,
new_wholesaler_inventories,
new_ripening_storage_inventories,
new_local_market_demands,
new_farmers_market_demands,
new_retailer_demands,
state.transportation_costs,
state.handling_costs,
)
return new_state
問題は、エピソードを実行できないことです。問題がどこにあるのかわかりません。
それで私は上記を試しました、実際にはQ学習を使用してマルコフ決定プロセスを作成しようとしていますが、エピソードで訓練しようとすると立ち往生します。