ValueError: 基数 10 の int() のリテラルが無効です: Q ラーニングの ’ [クローズ]

概要

episodes = generate_training_episodes(num_episodes)
for episode in episodes:
    for state, action, reward, next_state in episode:
        # Get the available actions for the next state
        next_available_actions = get_available_actions(next_state)

        # Calculate the maximum Q-value for the next state
        next_q_values = [Q_table[(next_state, next_action)] for next_action in next_available_actions]
        max_next_q_value = max(next_q_values)

        # Update the Q-value for the current state-action pair
        q_value = Q_table[(state, action)]
        Q_table[(state, action)] = q_value + ALPHA * (reward + GAMMA * max_next_q_value - q_value)

上記のコードでは値エラーが発生します

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-19-7df231fe06a5> in <cell line: 1>()
----> 1 episodes = generate_training_episodes(num_episodes)
      2 for episode in episodes:
      3     for state, action, reward, next_state in episode:
      4         # Get the available actions for the next state
      5         next_available_actions = get_available_actions(next_state)

1 frames
<ipython-input-15-84c03145c31c> in transition_function(state, action)
     54 
     55     elif action.source.startswith("W"):
---> 56         source_index = int(source_id) - 1
     57         new_wholesaler_inventories[source_index] -= action.quantity
     58         if destination_prefix == "LM":

ValueError: invalid literal for int() with base 10: ''

これは遷移関数の問題であり、次のように定義しました。

def transition_function(state, action):
    new_farmer_inventories = state.farmer_inventories.copy()
    new_local_trader_inventories = state.local_trader_inventories.copy()
    new_ppc_inventories = state.ppc_inventories.copy()
    new_wholesaler_inventories = state.wholesaler_inventories.copy()
    new_ripening_storage_inventories = state.ripening_storage_inventories.copy()
    new_local_market_demands = state.local_market_demands.copy()
    new_farmers_market_demands = state.farmers_market_demands.copy()
    new_retailer_demands = state.retailer_demands.copy()

    source_id = action.source[1:] if action.source.startswith("F") else \
                action.source[2:] if action.source.startswith(("LT", "PPC", "W", "RS")) else ""

    destination_prefix = action.destination[:2] if action.destination.startswith(("LM", "FM")) else \
                         action.destination[0] if action.destination.startswith(("F", "R")) else ""

    destination_id = action.destination[2:] if action.destination.startswith(("PPC", "LT", "W")) else \
                     action.destination[1:] if action.destination.startswith(("F", "R")) else \
                     action.destination[3:] if action.destination.startswith(("LM", "FM")) else ""

    # Update inventory levels
    if action.source.startswith("F"):
        source_index = int(source_id) - 1
        new_farmer_inventories[source_index] -= action.quantity
        if action.destination.startswith("LT"):
            destination_index = int(destination_id) - 1
            new_local_trader_inventories[destination_index] += action.quantity
        elif action.destination.startswith("PPC"):
            destination_index = int(destination_id) - 1
            new_ppc_inventories[destination_index] += action.quantity
        elif action.destination.startswith("W"):
            destination_index = int(destination_id) - 1
            new_wholesaler_inventories[destination_index] += action.quantity

    elif action.source.startswith("LT"):
        source_index = int(source_id) - 1
        new_local_trader_inventories[source_index] -= action.quantity
        if action.destination.startswith("PPC"):
            destination_index = int(destination_id) - 1
            new_ppc_inventories[destination_index] += action.quantity
        elif action.destination.startswith("W"):
            destination_index = int(destination_id) - 1
            new_wholesaler_inventories[destination_index] += action.quantity

    elif action.source.startswith("PPC"):
        source_index = int(source_id) - 1
        new_ppc_inventories[source_index] -= action.quantity
        if action.destination.startswith("W"):
            destination_index = int(destination_id) - 1
            new_wholesaler_inventories[destination_index] += action.quantity
        elif action.destination.startswith("RS"):
            destination_index = int(destination_id) - 1
            new_ripening_storage_inventories[destination_index] += action.quantity

    elif action.source.startswith("W"):
        source_index = int(source_id) - 1
        new_wholesaler_inventories[source_index] -= action.quantity
        if destination_prefix == "LM":
            destination_index = int(destination_id) - 1
            new_local_market_demands[destination_index] = max(new_local_market_demands[destination_index] - action.quantity, 0)
        elif destination_prefix == "FM":
            destination_index = int(destination_id) - 1
            new_farmers_market_demands[destination_index] = max(new_farmers_market_demands[destination_index] - action.quantity, 0)
        elif destination_prefix == "R":
            destination_index = int(destination_id) - 1
            new_retailer_demands[destination_index] = max(new_retailer_demands[destination_index] - action.quantity, 0)

    elif action.source.startswith("RS"):
        source_index = int(source_id) - 1
        new_ripening_storage_inventories[source_index] -= action.quantity
        if action.destination.startswith("W"):
            destination_index = int(destination_id) - 1
            new_wholesaler_inventories[destination_index] += action.quantity

    new_state = State(
        new_farmer_inventories,
        new_local_trader_inventories,
        new_ppc_inventories,
        new_wholesaler_inventories,
        new_ripening_storage_inventories,
        new_local_market_demands,
        new_farmers_market_demands,
        new_retailer_demands,
        state.transportation_costs,
        state.handling_costs,
    )

    return new_state

問題は、エピソードを実行できないことです。問題がどこにあるのかわかりません。

それで私は上記を試しました、実際にはQ学習を使用してマルコフ決定プロセスを作成しようとしていますが、エピソードで訓練しようとすると立ち往生します。